In [1]:
pip install asyncpraw


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd


In [3]:
review_keywords = (
    r"(good service|bad service|friendly staff|rude crew|helpful|unhelpful|"
    r"flew|fly|flown|took a flight|onboard|aircraft|seating|legroom|"
    r"on time|delayed|canceled|missed flight|"
    r"comfortable|cramped|smooth flight|bumpy|turbulence|"
    r"cheap ticket|expensive|affordable|overpriced|"
    r"baggage|lost luggage|extra charge|carry-on|"
    r"good experience|bad experience|would recommend|never again|worst airline)"
)
unrelated_keywords = (
    r"(Thai Airways|other airline|different airline|another airline|better airline|\?$"
    r"|but people here aren't really shy about checking stuff|Cathay Pacific|IATA|hand carry rule|"
    r"OP|That’s a great itinerary|scoot|Fernandes|Maayos|Thanks|pyramid scheme)"
)

In [4]:
import asyncpraw

reddit = asyncpraw.Reddit(
    client_id="",
    client_secret="",
    user_agent="",
    username="",
    password=""
    )
    

In [5]:
import re
import asyncio
import asyncpraw

async def getAirAsiaReviews(url, unrelatedKeywords):

    try:
        submission = await reddit.submission(url=url)
        await submission.comments.replace_more(limit=None)  # Load ALL comments, including nested replies

        airasia_reviews = []
        total_comments = 0  # Counter for all comments & subcomments

        async def process_comment(comment):
            """ Recursively process a comment and its replies """
            nonlocal total_comments
            if isinstance(comment, asyncpraw.models.Comment):
                total_comments += 1  # Increment total count

                comment_text = comment.body.strip()

                # Skip single-word comments
                if len(comment_text.split()) == 1:
                    return  

                # Skip comments with unrelated keywords
                if any(re.search(keyword, comment_text, re.IGNORECASE) for keyword in unrelatedKeywords):
                    return  

                # Add to list
                airasia_reviews.append(comment_text)

                # Recursively process replies (subcomments)
                for reply in comment.replies:
                    await process_comment(reply)

        # Process all top-level comments
        for top_comment in submission.comments:
            await process_comment(top_comment)

        return airasia_reviews, total_comments  # Return reviews & total count

    except asyncpraw.exceptions.APIException as e:
        print(f"API Error: {e}")
    except Exception as e:
        print(f"An error occurred: {type(e).__name__}: {e}")

    return [], 0  # Ensure function always returns something


In [6]:
async def processUrls(urls_with_keywords):
    all_reviews = []
    total_comments_count = 0  # Initialize counter

    # Create tasks with corresponding keywords for each URL
    tasks = [getAirAsiaReviews(url, keywords) for url, keywords in urls_with_keywords]

    results = await asyncio.gather(*tasks)  # Get results

    for reviews, count in results:
        all_reviews.extend(reviews)
        total_comments_count += count  # Add count from each URL

    print(f"Total Comments (including subcomments): {total_comments_count}")  
    print(f"Filtered Comments: {len(all_reviews)}")  

    for review in all_reviews:
        print("-" * 20)
        print(review)  # Corrected indentation


In [7]:
urls_with_keywords = [
    # ("https://www.reddit.com/r/travel/comments/1fr6bdo/experience_with_airasia/", ["bummer", "Korea", "covid", r"\?$", "How can airline be"]),
    # ("https://www.reddit.com/r/Flights/comments/1dy6pc5/is_airasia_good/", ["subreddit", "advertisement", r"3-3", "fatal", "A330", "booking.com", "A320", "different?", "narrowbody"]),
    # ("https://www.reddit.com/r/Flights/comments/1dy6pc5/is_airasia_good/", [
    #     "european", "filter", "chinese", "MAS", "bots", "POV", r"\#", "jobless", "tambang", "sanusi", "tiong", 
    #     "padan", "viral", "China", "Shanghai", "spit", "Perchance", "apples", "character's", "ignorant", "bat", 
    #     "print", "B40", "customer service", "doomerism", "1st", "check-in", "bikin", "content", "mainlanders", 
    #     "amoi", "flipping", "America", "downvoted", "imaginary", "MAVCOM", "fitness", "transaction", "not her", 
    #     "enforcement", "one of them", "smart", "incase", "I agree", "scenario", "Depends"
    # ]),
    # ("https://www.reddit.com/r/malaysia/comments/yiauxn/whats_your_opinion_on_airasia/", ["Period", r"\?$", "engine problem", "generator", "murah"]),
    ("https://www.reddit.com/r/travel/comments/18lvddo/air_asia_is_shit/", [])
]


In [8]:
await processUrls(urls_with_keywords)

Total Comments (including subcomments): 137
Filtered Comments: 133
--------------------
I've flown Air Asia multiple times and never had an issue. What you described could happen to any budget airline to be honest. But I do feel for you, no one should have to go through that horrible experience in the middle of the night.

It's like complaining about Ryanair or Jetstar. You get what you pay for
--------------------
> What you described could happen to any budget airline to be honest.

Well, any airline really.

Flying with a full service carrier isn't some magic bullet.
--------------------
Exactly the only time I've suffered delays like this was with United. Ryanair have saved me from my own tardiness by picking me out of security queues and Air Asia while they might have no frills at all have always got me where I needed to go.
--------------------
AirAsia has really good food too.
--------------------
Food that you have to purchase..
--------------------
Well. To be fair you purchas