In [1]:
# Importing some basic libraries

import numpy as np
import pandas as pd

# Importing the dataset

air = pd.read_csv('airline_df_nlped.csv')
air.head()

Unnamed: 0,Country,Airline,Review,Cleaned_Review,Sentiment,Review2,Cleaned_Review2
0,China,Air China,los angeles beijing return food low quality st...,los angeles beijing return food quality staff ...,Negative,los angeles beijing return food low quality st...,los angeles beijing return food quality staff ...
1,China,Air China,round trip from hong kong to munich the main r...,round trip hong kong munich main reason fly ai...,Negative,round trip from hong kong to munich the main r...,round trip hong kong munich main reason fly ai...
2,China,Air China,sydney beijing paris then rome beijing to sydn...,sydney beijing paris rome beijing sydney famil...,Negative,sydney beijing paris then rome beijing to sydn...,sydney beijing paris rome beijing sydney famil...
3,China,Air China,london to sydney return via beijing a cheap fl...,london sydney return beijing cheap flight live...,Negative,london to sydney return via beijing a cheap fl...,london sydney return beijing cheap flight live...
4,China,Air China,beijing to shanghai only one check in desk for...,beijing shanghai check desk standby passenger ...,Positive,beijing to shanghai only one check in desk for...,beijing shanghai check desk standby passenger ...


In [3]:
# Using the Vader Sentiment Analyzer

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Creating an instance of the SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

# Creating a function to get the compound sentiment scores and only classify the sentiment as positive, negative

def get_sentiment_scores(sentence):
    score = analyzer.polarity_scores(sentence)
    return score['compound']

# Creating a new column in the dataset to store the compound sentiment scores

air['sentiment_scores'] = air['Review2'].apply(get_sentiment_scores)

# Creating a new column in the dataset to store the sentiment

air['sentiment'] = air['sentiment_scores'].apply(lambda x: 'positive' if x > 0 else 'negative')

In [5]:
# Using TextBlob Sentiment Analyzer to get the sentiment scores

from textblob import TextBlob

# Creating a function to get the sentiment scores

def get_sentiment_scores2(sentence):
    score = TextBlob(sentence)
    return score.sentiment.polarity

# Creating a new column in the dataset to store the sentiment scores

air['sentiment_scores2'] = air['Review2'].apply(get_sentiment_scores2)

# Creating a new column in the dataset to store the sentiment

air['sentiment2'] = air['sentiment_scores2'].apply(lambda x: 'positive' if x > 0 else 'negative')

In [6]:
air.head()

Unnamed: 0,Country,Airline,Review,Cleaned_Review,Sentiment,Review2,Cleaned_Review2,sentiment_scores,sentiment,sentiment_scores2,sentiment2
0,China,Air China,los angeles beijing return food low quality st...,los angeles beijing return food quality staff ...,Negative,los angeles beijing return food low quality st...,los angeles beijing return food quality staff ...,0.2238,positive,0.211032,positive
1,China,Air China,round trip from hong kong to munich the main r...,round trip hong kong munich main reason fly ai...,Negative,round trip from hong kong to munich the main r...,round trip hong kong munich main reason fly ai...,-0.0945,negative,0.089744,positive
2,China,Air China,sydney beijing paris then rome beijing to sydn...,sydney beijing paris rome beijing sydney famil...,Negative,sydney beijing paris then rome beijing to sydn...,sydney beijing paris rome beijing sydney famil...,-0.6089,negative,-0.248191,negative
3,China,Air China,london to sydney return via beijing a cheap fl...,london sydney return beijing cheap flight live...,Negative,london to sydney return via beijing a cheap fl...,london sydney return beijing cheap flight live...,0.4182,positive,0.152256,positive
4,China,Air China,beijing to shanghai only one check in desk for...,beijing shanghai check desk standby passenger ...,Positive,beijing to shanghai only one check in desk for...,beijing shanghai check desk standby passenger ...,-0.1779,negative,0.09375,positive


In [7]:
# Checking the accuracy of the Vader Sentiment Analyzer

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Mapping the sentiment to 1 and 0

air['sentiment'] = air['sentiment'].map({'positive': 1, 'negative': 0})
air['Sentiment'] = air['Sentiment'].map({'Positive': 1, 'Negative': 0})

# Checking the accuracy of the Vader Sentiment Analyzer

print(accuracy_score(air['sentiment'], air['Sentiment']))
print(classification_report(air['sentiment'], air['Sentiment']))
print(confusion_matrix(air['sentiment'], air['Sentiment']))

0.8376298829409443
              precision    recall  f1-score   support

           0       0.72      0.87      0.79      5186
           1       0.92      0.82      0.87     10020

    accuracy                           0.84     15206
   macro avg       0.82      0.85      0.83     15206
weighted avg       0.85      0.84      0.84     15206

[[4508  678]
 [1791 8229]]


In [8]:
# Checking the accuracy of the TextBlob Sentiment Analyzer

# Mapping the sentiment to 1 and 0

air['sentiment2'] = air['sentiment2'].map({'positive': 1, 'negative': 0})

# Checking the accuracy of the TextBlob Sentiment Analyzer

print(accuracy_score(air['sentiment'], air['sentiment2']))
print(classification_report(air['sentiment'], air['sentiment2']))
print(confusion_matrix(air['sentiment'], air['sentiment2']))

0.811192950151256
              precision    recall  f1-score   support

           0       0.85      0.54      0.66      5186
           1       0.80      0.95      0.87     10020

    accuracy                           0.81     15206
   macro avg       0.83      0.75      0.77     15206
weighted avg       0.82      0.81      0.80     15206

[[2812 2374]
 [ 497 9523]]
