In [50]:
import pandas as pd
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()

# Load data
url = 'https://raw.githubusercontent.com/DyanelLancea/Airline-Review-Sentiement-Analysis/refs/heads/master/airlines_review.csv'
df = pd.read_csv(url, index_col=0)

# List of special characters to remove
removechar = ['!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
               '-', '_', '=', '+', '{', '}', '[', ']', '|',
               '\\', ':', ';', '"', "'", '<', '>', ',', '.', '?',
                 '/', '~', '`', '✅ Trip Verified', 'Not Verified']

# Remove duplicates
df = df.drop_duplicates()

# Replace missing values with NA or Unknown function
def replace_missing_value(df):
    for col in df.columns:
        if df[col].dtype == 'O':  # Object type (string)
            df[col] = df[col].fillna('Unknown')
        else:
            df[col] = df[col].fillna("NA")
    return 

# Remove special characters from specific columns function
def remove_special_characters(df, removechar, char):
    for char in removechar:
        df['Airlines'] = df['Airlines'].str.replace(char, ' ', regex=False)
        df['Text Content'] = df['Text Content'].str.replace(char, '', regex=False)
    return df

# Apply data cleaning functions
replace_missing_value(df)
remove_special_characters(df, removechar, char='')

# Standardize text case
df['Airlines'] = df['Airlines'].str.title()
df['Name'] = df['Name'].str.title()
df['Text Content'] = df['Text Content'].str.lower()

# Sentiment analysis on 'Text Content' column
scores = sid.polarity_scores(df['Text Content'][0])

# Display sentiment scores
score_results = "Sentiment Analysis Result: " + str(scores)
print(score_results)

# Display the cleaned DataFrame
df

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Derrick\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Sentiment Analysis Result: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


Unnamed: 0,Airlines,Name,Date Published,Text Content
0,Qatar Airways,Romana Malik,7/9/2025,we choose our seats when booking and they ch...
1,Qatar Airways,J Raiz,2/9/2025,initially i was supposed to be traveling w...
2,Qatar Airways,Iman Yusuf,1/9/2025,i want to sincerely thank qatar airways fo...
3,Qatar Airways,Ronald Zwart,26/8/2025,boarding was efficient friendly personable w...
4,Qatar Airways,Dmitriy Berezin,21/8/2025,when booking the flight wa shown as a qsu...
...,...,...,...,...
14586,Hainan Airlines,J Depaepe,19/1/2010,brupek business class 6 hour delay in brussels...
14587,Hainan Airlines,Pieter D'Hamers,10/1/2010,brupekbru new a330 good and friendly service v...
14588,Hainan Airlines,Y Chen,9/1/2010,beijingguangzhou b737800 in economy despite th...
14589,Hainan Airlines,A Smith,16/12/2009,xiy to ctu on boeing 737800 in economy flight ...
