In [1]:
!pip install asyncpraw 



In [2]:
import pandas as pd

In [4]:
review_keywords = (
    r"(good service|bad service|friendly staff|rude crew|helpful|unhelpful|"
    r"flew|fly|flown|took a flight|onboard|aircraft|seating|legroom|"
    r"on time|delayed|canceled|missed flight|"
    r"comfortable|cramped|smooth flight|bumpy|turbulence|"
    r"cheap ticket|expensive|affordable|overpriced|"
    r"baggage|lost luggage|extra charge|carry-on|"
    r"good experience|bad experience|would recommend|never again|worst airline)"
)
url1UnrelatedKeywords = (
    r"(\?$|A... Pyramid scheme?|https://luggagelosers.com/|Flown September and November 2024|Same experience with AirAsia Japan)"
)

In [15]:
import re
import asyncio
import asyncpraw

async def getAirAsiaReviews(url, unrelatedKeywords):
    reddit = asyncpraw.Reddit(
        client_id="", 
        client_secret="", 
        user_agent="", 
        username="",
        password=""
    )

    try:
        submission = await reddit.submission(url=url)
        await submission.comments.replace_more(limit=None)  # Load all comments

        airasia_reviews = []
        total_comments = 0  # Counter for all comments & subcomments

        async def process_comment(comment):
            """ Recursively process a comment and its replies """
            nonlocal total_comments
            total_comments += 1  # Count this comment

            comment_text = comment.body.strip()

            # Check comment conditions
            if len(comment_text.split()) > 1 and not any(re.search(keyword, comment_text, re.IGNORECASE) for keyword in unrelatedKeywords):
                airasia_reviews.append(comment_text)

            # Fetch and process replies
            await comment.replies.replace_more(limit=None)  # Ensure all replies are loaded
            for reply in comment.replies:
                await process_comment(reply)  # Recursively process each reply

        # Process all top-level comments
        for top_comment in submission.comments:
            await process_comment(top_comment)

        return airasia_reviews, total_comments  # Return reviews & total count

    except asyncpraw.exceptions.APIException as e:
        print(f"API Error: {e}")
    except Exception as e:
        print(f"An error occurred: {e.__class__.__name__}: {e}")
    return [], 0

In [17]:
async def processUrls(urls_with_keywords):
    all_reviews = []
    total_comments_count = 0  # Initialize counter

    # Create tasks with corresponding keywords for each URL
    tasks = [getAirAsiaReviews(url, keywords) for url, keywords in urls_with_keywords]

    results = await asyncio.gather(*tasks)  # Get results

    for reviews, count in results:
        all_reviews.extend(reviews)
        total_comments_count += count  # Add count from each URL

    print(f"Total Comments (including subcomments): {total_comments_count}")  
    print(f"Filtered Comments: {len(all_reviews)}")  

    for review in all_reviews:
        print("-" * 20)
        print(review)


In [151]:
urls_with_keywords = [
    # ("https://www.reddit.com/r/travel/comments/1fr6bdo/experience_with_airasia/", ["bummer", "Korea", "covid", r"\?$","How can airline be"]),
    # ("https://www.reddit.com/r/Flights/comments/1dy6pc5/is_airasia_good/", ["subreddit", "advertisement",r"3-3","fatal","A330","booking.com","A320","different?","narrowbody"]),
    # ("https://www.reddit.com/r/malaysia/comments/1cbt207/chinese_tourist_gets_scolded_bodoh_by_airasia/", ["european","filter","chinese","MAS","bots","POV",r"\#","jobless","tambang","sanusi","tiong","padan","viral","China","Shanghai","spit","Perchance","apples","character's","ignorant","bat","print","B40","customer service","doomerism","1st","check-in","bikin","content","mainlanders","amoi","flipping","America","downvoted","imaginary","MAVCOM","fitness","transaction","not her","enforcement","one of them","smart","incase","I agree","scenario","Depends",]),
    # ("https://www.reddit.com/r/malaysia/comments/yiauxn/whats_your_opinion_on_airasia/", ["Period",r"\?$","engine problem","generator","murah"]),
    # ("https://www.reddit.com/r/travel/comments/18lvddo/air_asia_is_shit/", ["bullet","purchase..","incentivized","To be fair","Vietjet enters","bro scoot","bus now","stripped-down","plain wrong","plane wrong","settle claims","accommodation","http","Fair enough","Cathay","regional","spot on","advice","NSW","Western","Canberra","nautical","MEL-SYD","detour","Bangkok","misguided","Emirates","actually believe","Was the","flexible","currencies","classic","TSA","vanished","It is","TRS","Curiously","low level","doodoo","weather","dignity","January","Buy travel insurance","it lol","UK","advertisement","roundtrip","compensated","0h  no"]),
    # ("https://www.reddit.com/r/travel/comments/1581h8f/im_having_serious_issues_with_the_airasia_website/", ["out Trip.com","Good to know","Flyertalk","Hahah","Expedia","white","Webjet","Do you remember","How did you","1 year later","dunno","Veit","company in thailand"]),
    ("https://www.reddit.com/r/malaysia/comments/1djh936/i_dont_get_it_whats_the_hate_on_air_asia/", []),
]

In [153]:
await processUrls(urls_with_keywords)

Total Comments (including subcomments): 209
Filtered Comments: 208
--------------------
I guess you didn’t know the shits they did during Covid times
--------------------
They still owe us $700.
--------------------
Please enlighten me
--------------------
People had to beg for refunds, for one
--------------------
Most don't even get any refund.
--------------------
I don't get my refund, lost around $450, fuck Air Asia.
--------------------
Oh they gave me my rm300+ in credit but it was during the no flights period and expired before most people can travel
--------------------
man, fuck air france ... and their "customer suppor" ... that indian dude fought against refunding me like his citizenship depended on it!
--------------------
Probably did
--------------------
Yeah, get fired and return back to their own country. We need more than a union.
--------------------
Lost 2.7k ☺️ yes they did give refund voucher, acc is jammed because of their stupid otp issue that they absolutely re