In [918]:
!pip install asyncpraw 
!pip install nltk

Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl.metadata (40 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl (284 kB)
Installing collected packages: regex, nltk
Successfully installed nltk-3.9.1 regex-2024.11.6


In [919]:
import pandas as pd

In [922]:
review_keywords = (
    r"(good service|bad service|friendly staff|rude crew|helpful|unhelpful|"
    r"flew|fly|flown|took a flight|onboard|aircraft|seating|legroom|"
    r"on time|delayed|canceled|missed flight|"
    r"comfortable|cramped|smooth flight|bumpy|turbulence|"
    r"cheap ticket|expensive|affordable|overpriced|"
    r"baggage|lost luggage|extra charge|carry-on|"
    r"good experience|bad experience|would recommend|never again|worst airline)"
)
url1UnrelatedKeywords = (
    r"(\?$|A... Pyramid scheme?|https://luggagelosers.com/|Flown September and November 2024|Same experience with AirAsia Japan)"
)

In [924]:
import re
import asyncio
import asyncpraw

async def getAirAsiaReviews(url, unrelatedKeywords):
    async with asyncpraw.Reddit(
        client_id="bl48I4qG60zo01mntiKP3w", 
        client_secret="0N05VDOg785H3oxead-Jc9i5txuBkg", 
        user_agent="JohnLopsided-Whole-8466", 
        username="Lopsided-Whole-8466",
        password="s0161230"
    )as reddit:

        try:
            submission = await reddit.submission(url=url)
            await submission.comments.replace_more(limit=None)  # Load all comments
    
            airasia_reviews = []
            total_comments = 0  # Counter for all comments & subcomments
    
            async def process_comment(comment):
                """ Recursively process a comment and its replies """
                nonlocal total_comments
                total_comments += 1  # Count this comment
    
                comment_text = comment.body.strip()
    
                # Check comment conditions
                if len(comment_text.split()) > 1 and not any(re.search(keyword, comment_text, re.IGNORECASE) for keyword in unrelatedKeywords):
                    airasia_reviews.append(comment_text)
    
                # Fetch and process replies
                await comment.replies.replace_more(limit=None)  # Ensure all replies are loaded
                for reply in comment.replies:
                    await process_comment(reply)  # Recursively process each reply
    
            # Process all top-level comments
            for top_comment in submission.comments:
                await process_comment(top_comment)
    
            return airasia_reviews, total_comments  # Return reviews & total count
    
        except asyncpraw.exceptions.APIException as e:
            print(f"API Error: {e}")
        except Exception as e:
            print(f"An error occurred: {e.__class__.__name__}: {e}")
        return [], 0

In [944]:
import asyncio
import csv
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon
nltk.download('vader_lexicon')

async def processUrls(urls_with_keywords):
    sia = SentimentIntensityAnalyzer()  # Initialize sentiment analyzer
    all_reviews = []
    total_comments_count = 0  

    # Create tasks with corresponding keywords for each URL
    tasks = [getAirAsiaReviews(url, keywords) for url, keywords in urls_with_keywords]
    results = await asyncio.gather(*tasks)  # Get results

    for reviews, count in results:
        for review in reviews:
            # Get sentiment score
            sentiment_score = sia.polarity_scores(review)["compound"]

            # Classify sentiment
            if sentiment_score >= 0.55:
                sentiment = "Positive"
            elif sentiment_score <= -0.55:
                sentiment = "Negative"
            else:
                sentiment = "Neutral"

            all_reviews.append((review, sentiment))  # Store review with label
        total_comments_count += count  

    print(f"Total Comments (including subcomments): {total_comments_count}")  
    print(f"Filtered Comments: {len(all_reviews)}")  

    # Save to CSV
    filename = "AirAsiaComments.csv"
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Review", "Sentiment"])  # CSV header

        for review, sentiment in all_reviews:
            writer.writerow([review, sentiment])  # Save each review with label

    print(f"Saved {len(all_reviews)} labeled reviews to {filename}")



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/geraint/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [946]:
urls_with_keywords = [
    ("https://www.reddit.com/r/travel/comments/1fr6bdo/experience_with_airasia/", ["bummer", "Korea", "covid", r"\?$","How can airline be"]),
    ("https://www.reddit.com/r/Flights/comments/1dy6pc5/is_airasia_good/", ["subreddit", "advertisement",r"3-3","fatal","A330","booking.com","A320","different?","narrowbody"]),
    ("https://www.reddit.com/r/malaysia/comments/1cbt207/chinese_tourist_gets_scolded_bodoh_by_airasia/", ["european","filter","chinese","MAS","bots","POV",r"\#","jobless","tambang","sanusi","tiong","padan","viral","China","Shanghai","spit","Perchance","apples","character's","ignorant","bat","print","B40","customer service","doomerism","1st","check-in","bikin","content","mainlanders","amoi","flipping","America","downvoted","imaginary","MAVCOM","fitness","transaction","not her","enforcement","one of them","smart","incase","I agree","scenario","Depends",]),
    ("https://www.reddit.com/r/malaysia/comments/yiauxn/whats_your_opinion_on_airasia/", ["Period",r"\?$","engine problem","generator","murah"]),
    ("https://www.reddit.com/r/travel/comments/18lvddo/air_asia_is_shit/", ["bullet","purchase..","incentivized","To be fair","Vietjet enters","bro scoot","bus now","stripped-down","plain wrong","plane wrong","settle claims","accommodation","http","Fair enough","Cathay","regional","spot on","advice","NSW","Western","Canberra","nautical","MEL-SYD","detour","Bangkok","misguided","Emirates","actually believe","Was the","flexible","currencies","classic","TSA","vanished","It is","TRS","Curiously","low level","doodoo","weather","dignity","January","Buy travel insurance","it lol","UK","advertisement","roundtrip","compensated","0h  no"]),
    ("https://www.reddit.com/r/travel/comments/1581h8f/im_having_serious_issues_with_the_airasia_website/", ["out Trip.com","Good to know","Flyertalk","Hahah","Expedia","white","Webjet","Do you remember","How did you","1 year later","dunno","Veit","company in thailand"]),
    ("https://www.reddit.com/r/malaysia/comments/1djh936/i_dont_get_it_whats_the_hate_on_air_asia/", ["enlighten me","air france","Probably did","union","Erling","tribunal","trail","hundreds","fuk","Philippine","not illegal","hates from","Enrich","not ANA","EVA","singing FA","KL-KK","BKI-KUL","rotting","apples","ransom","Wizz","mentioned Scoot","ALL were delayed","FTFY","well done bot","LMAOOOOOOOOOO","1.6k","http","ioncare","comparison","price is not","stalking","FTW","mlm","Not sure man","mean comment","yakitori","_not_","owe them money","Vietjet","larh","Puduraya","Heathrow","poker","weighing staff","loathes","are more expensive","from changi","on the same plane","RyanAir CEO","jank","standing","British","time-sensitive","safety concerns","suicide-Ryanair","Germainwings","90s","Maswings","whataboutism","Myairline was good","relatives","Pattaya","Always delay","batik air quite","Lion","Vishnu","holly","creaking","Can share","journey next week","mahal","belum","flight/a","airlines from Indonesia","burner","on the Max"]),
    ("https://www.reddit.com/r/Flights/comments/160p0qh/is_air_asia_good/", ["http", "Thank you","Calories"]),
    ("https://www.reddit.com/r/malaysia/comments/1fo4544/airasia_named_worlds_best_budget_airline_for_15th/", ["Daily Show","and consecutively","bottom of the barrel","Etihad","rm10","better alternatives","SIA","Seoul","Will keep","cost-service-comfort","Exactly","still worth it","ANA","Lionair","Thanks","low cost","zipair","buffet","April","Tony","Most votes win","low bar"]),
    ("https://www.reddit.com/r/malaysia/comments/vh9x7d/fly_with_malaysia_airlines_or_airasia/", ["half price","rm1500","Sabah","infotainment","vent","Jetstar","EK","Singapore airlines","Thai Airways","rescheduled a month","two weeks out","AirAsia stole"]),
    ("https://www.reddit.com/r/askSingapore/comments/w5vap5/are_airasia_flights_from_singapore_safereliable/",["indo","ecommerce","conclusion","flight delays"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/18r7dtl/thai_air_asia_reliable/", ["thanks", r"\?$",]),
    ("https://www.reddit.com/r/Flights/comments/1gkdkuw/how_safe_is_airasia_cambodia/", ["http"]),
    ("https://www.reddit.com/r/AirTravelIndia/comments/1is9nir/requesting_feedback_on_malaysia_airlines_vs/", ["http","benefits of malaysian","Got it","directly","indigo","737 MAX","Couldn't agree more","judgmental","blood","Boeing","equally useless for me","Malaysia Airlines"]),
    ("https://www.reddit.com/r/malaysia/comments/1det4wy/airasia_ak857/", ["fallen","http","sorry","wishing","Officer",r"\?$","dint","pilots","7hrs","Aborted","engineers","dog","her 😂","HVAC","Thanks","tony","V1","V2","Fell off","EasyJet","triggered","disappear","2:57pm"]),
    ("https://www.reddit.com/r/fearofflying/comments/17ybh7d/flying_with_air_asia/", []),
    ("https://www.reddit.com/r/Thailand/comments/13ga3zu/airasia/", ["Thanks","email","balls","wait up","hope you","23","CC","procedure","delay"]),
    ("https://www.reddit.com/r/Flights/comments/1aixa75/airlines_to_use_while_in_asia/?q=airasia&type=comments&cId=abb031b0-a3fd-4c4e-8784-96e2d363360f&iId=5167fe72-f102-45f9-9cf5-25755aface47", [r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/malaysia/comments/191bi55/airasia_group_amongst_safest_lowcost_airlines_for/", []),
    ("https://www.reddit.com/r/travel/comments/1eqdngp/airline_companies_for_sea_travel/?q=airasia&type=comments&cId=c818bf1f-0eea-4f10-9bc5-adc56fdaba2f&iId=888012b0-a7c3-4e9e-b07c-dd293a575a80", [r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/malaysia/comments/xmmb7c/despite_getting_most_complaints_airasia_again/", ["eggs","berpisah","MAVCOM","clap","So u mean","per million passengers","worse budget airlines"]),
    ("https://www.reddit.com/r/malaysia/comments/1hvlkej/airasia_aseanmove_pass_review/", ["Ppl"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/1hvlmbs/airasia_aseanmove_pass_review/", []),
    ("https://www.reddit.com/r/Thailand/comments/108f1mq/do_not_buy_the_air_asia_super_pass/", [r"\?$","damn that","smoke","routes","20,000","Unthinkable","Frontier","Twitter","could not flights","Hawaii","baht","about that","AAirpass","cynic","take the hit","Not anymore","sacrifice","- will","How","400","for sharing","it's a good deal","money and stress"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/1doplka/australia_to_thailand_whats_better_jetstar_or_air/", [r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/Shoestring/comments/14h5xwz/cheapest_airports_to_get_to_asia_from_mainland_us/?q=airasia&type=comments&cId=c818bf1f-0eea-4f10-9bc5-adc56fdaba2f&iId=74872392-fe69-483a-82ee-5c38e568d841", [r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/Flights/comments/1clnaom/peach_and_other_asian_low_cost_carriers/?q=airasia&type=comments&cId=c818bf1f-0eea-4f10-9bc5-adc56fdaba2f&iId=a281fdb8-da9c-4609-863c-b773ebe43fba",[r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/QantasAirways/comments/1d7swy4/australia_to_thailand_whats_a_better_budget/", ["scoot","Ramadan",r"\?$","Airbus","Singapore Airlines","third or fourth city","sale","bot","Australian","JQ","Gold Coast"]),
    ("https://www.reddit.com/r/phtravel/comments/16p1k8m/should_i_book_airasia/", ["thank you", "OP", "PAL","pal", "thanksss", "sorry", "ang", "hindi", "bat","thankss","kung","maayos"]),
    ("https://www.reddit.com/r/AirAsia/comments/1iqokx6/cancelled_flights/", []),
    ("https://www.reddit.com/r/malaysia/comments/1heqoz1/airasia_truly_asia/", ["PAL", "thanks!", "AIR JAPAN", "Narita", "ANA", "beer", "insurance", "merger", "DJ", "motto","http","capsules","observatory","10am","Vindicated","transport ministry","200 to 300",r"\?$"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/yzwxy6/airasia_trustworthy/", ["guy", "Samui", "thank you", "Piggybacking", "Worldwilde", "advtrilys",r"\?$","Technical it's the total weight"]),
    ("https://www.reddit.com/r/Advice/comments/1dy6qfb/is_airasia_good/", []),
    ("https://www.reddit.com/r/ThailandTourism/comments/vn84xm/airasia_experience/", ["Malindo", "VietJet", "I was 🙏", "Thank you", "Visas", "peculiar", "lesson", "BK", "(KUL)", "posting", "lesson"]),
    ("https://www.reddit.com/r/malaysia/comments/157y5mj/current_state_and_reliability_of_air_asia/", ["thanks"]),
    ("https://www.reddit.com/r/Flights/comments/1i0bvg2/airasia_is_crappy_shitty_worst_airline_avoid_them/", ["Notice"]),
    ("https://www.reddit.com/r/Flights/comments/1da1sws/airasia_unlimited_pass_misleading_and_total_scam/", ["dispute"]),
    ("https://www.reddit.com/r/Kazakhstan/comments/1cxve2k/anyone_fly_airasia_to_kuala_lumpur/", ["luggage", "MOVE", "MDAC", "Thanks", "Wait"]),
    ("https://www.reddit.com/r/malaysia/comments/18posov/why_do_people_bash_so_much_airasia_but_still_fly/", ["MAS", "hypocritical", "carrier", "Newsflash", "Haha", "Malindo", "bus", "you pay with your time", "KLIA2", 
                                                                                                             "JB", "festive", "envy", "skinny", "scoot", "chubbier", "monopoly", "sliding", "shallow", "benci", "complain", 
                                                                                                             "charge", "dumb", "kiddo", "vendor", "cheapo", "Tony", "exorbitant", "cool", "messaur", "Vietnam", "beggars", 
                                                                                                             "PAS", "rm22m", "king", "bashing", "which is it", "Qantas", "love-hate","DAP", "OP's", "bash", "shut up", "But..."]),
    ("https://www.reddit.com/r/malaysia/comments/1fo4544/airasia_named_worlds_best_budget_airline_for_15th/", ["Spirit", "barrel", "Japan", "SIA", "KUL", "thanks", "Batik's", 
                                                                                                              "Nov", "ANA", "Thanks!", "aren't", "Tony", "Batik", "perspective", 
                                                                                                              "bar", "Changi", "theatre", "Singapore"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/1irvlq8/any_experience_with_air_asia/", ["emergency", "6'1", "Thailand", "standard", "Windows", "adapter", "depressing", "tech", "gym", "You're", 
                                                                                                "Thai", "plus sized", "overweight", "third", "Bangkok", "electronics", "screwed" ]),
    ("https://www.reddit.com/r/Thailand/comments/zii4a2/airasia_canceling_flights_like_crazy/", ["FYI", "NokAir", "Thailand", "effing", "SQ", "chasing", "shove", "physical", "bugging", 
                                                                                                "puttingoff", "Thai", "Malaysian", "DMK", "companies"]),
    ("https://www.reddit.com/r/Flights/comments/1hrjfuq/a_disappointing_experience_with_air_asia_the/", ["agreeing", "probably", "ULCC"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/1bvllc9/airasia_flight_changes_beyond_48hrs/", ["friends"]),
    ("https://www.reddit.com/r/travel/comments/1ai1bmh/book_with_air_asia_at_your_own_risk/", ["CAD", "discharge", "partner", "LCC", "credit card", "awful"]),
    ("https://www.reddit.com/r/malaysia/comments/1czy382/airasia_doublebooked_my_flight_canceled_the_wrong/", ["contact", "duplicate", "tabole", "OP", "keyword", "intentionally", "says...", "DM"]),
    ("https://www.reddit.com/r/TravelHacks/comments/1dcxssa/how_common_are_air_asia_delays_and_cancellations/", ["OTP", "OP"]),
    ("https://www.reddit.com/r/Thailand/comments/2fbvtq/is_air_asia_legit/", ["OTP", "OP", "Thanks", "remove" "SEA", "Cool!", "Thailand", "dick", "promotion", "shady"]),
    ("https://www.reddit.com/r/malaysia/comments/1b0apll/airasia_move_debuts_with_refreshed_app_look/", []),
    ("https://www.reddit.com/r/newzealand/comments/142bk8l/airasia_flights/", ["thanks"]),
    ("https://www.reddit.com/r/indonesia/comments/1aiw8vj/would_you_fly_air_asia_indo/", ["thanks", "lcc", "lion", "zero", "pedestrians", "setuju", "gua", "garuda", 
                                                                                         "Boeing", "so,", "737", "lol", "matter", "kaum mendang" ]),
    ("https://www.reddit.com/r/malaysia/comments/z8kvqv/air_asia/", ["MAS", "stockholm", "true", "lah", "bank"]),
    ("https://www.reddit.com/r/malaysia/comments/uc4jja/is_airasia_a_zombie_company_they_dont_seem_to_be/", ["Australia", "operation", "credit", "Cattle", "plausible", "live agent", "Ughh", "trashiest", "VTL", 
                                                                                                            "curious", "vtubers", "bf", "Mikazuki", "nah", "God", "Khazanah", "afloat", "VTuber", "Subang", 
                                                                                                            "sorry", "OP", "dumb", "retimed", "zombie"]),
    ("https://www.reddit.com/r/malaysia/comments/126cebq/airasia_brings_back_live_calls_for_customer/", ["vicious", "agent", "waiting time"]),
    ("https://www.reddit.com/r/malaysia/comments/1059ms3/malaysian_couple_stranded_in_australia_for_5_days/", ["credit card", "lol", "edition", "European Regulation", "asb", "lah", "Fr.", "haircut", "huy", "asylum"]),
    ("https://www.reddit.com/r/malaysia/comments/153l71z/got_disembark_from_airashit_flight_after_sitting/", ["thank you", "Tune", "rarely", "MAS", "Firefly", "pitch", "monkey", "hahahah", "monkeys", 
                                                                                                             "cuz", "Cebu", "Golden", "hell", "aerodynamics", "Thanks", "irrational", "honkai", 
                                                                                                             "![gif](giphy|yoJC2BlMMydcdBvgze)", "RM50", "saying", "punish", "Mekdi", "furious", 
                                                                                                             "pilot", "30+", "Japan", "bo", "kurung", "die", "Awww", "konek"]),
    ("https://www.reddit.com/r/travel/comments/13s4xbd/getting_flights_with_airasia/", ["bill", "itinerary"]),
    ("https://www.reddit.com/r/perth/comments/14grq56/air_asia_2023/", ["videos", "a330"]),
    ("https://www.reddit.com/r/singapore/comments/2auu4f/thoughts_and_views_about_airasia_singapore_japan/", ["ya"]),
    ("https://www.reddit.com/r/travel/comments/gnmm39/airasia_refunds_anyone_had_any_response/", ["contest", "company", "claim"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/1goijh3/air_asia_dmk_or_viet_jet_bkk_what_do_you_find_a/", ["inter", "metro","always","Thank","jal","Ngl", "BKK", "DMK", "Ymmv"]),
    ("https://www.reddit.com/r/ThailandTourism/comments/1fl0dwo/is_it_safe_to_book_flight_from_airasia_app/", ["currency", "CNX", "counters", "LCC"]),
    ("https://www.reddit.com/r/Flights/comments/1gklhzx/fly_air_asia_x_or_batik_air/", ["refund", "bot", "assignments"]),
    ("https://www.reddit.com/r/KualaLumpur/comments/1gklh5g/fly_air_asia_or_batik_air_from_australia_to_kuala/", ["MAS", "Coincidence", "MOVE", "comfortable", "Reallllyyy", "shut down"]),
    ("https://www.reddit.com/r/travel/comments/22vkc2/are_flights_around_southeast_asia_cheap/", [r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/Frugal/comments/33c38e/cheap_flights_in_asia/", [r"^(?!.*\bairasia\b).*"]),
    ("https://www.reddit.com/r/malaysia/comments/193ocd1/airasia_vs_mas_vs_malindobatik/", ["Vietjet","gojek","Alamak","Jakarta","Kuching","Thanks","What","Bali airport","check-in","Good point","BaggageSolution","Oof","100","MAS/Batik", "http"]),
    ("https://www.reddit.com/r/fearofflying/comments/1cy4pcq/is_air_asia_indonesia_safe/", ["fair"])
]

In [948]:
await processUrls(urls_with_keywords)

Total Comments (including subcomments): 2264
Filtered Comments: 1001
Saved 1001 labeled reviews to AirAsiaComments.csv
