In [1]:
import pandas as pd
from string import punctuation
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [2]:
data = pd.read_csv("/kaggle/input/singapore-airlines-reviews/singapore_airlines_reviews.csv")

In [3]:
data.head()

Unnamed: 0,published_date,published_platform,rating,type,text,title,helpful_votes
0,2024-03-12T14:41:14-04:00,Desktop,3,review,We used this airline to go from Singapore to L...,Ok,0
1,2024-03-11T19:39:13-04:00,Desktop,5,review,The service on Singapore Airlines Suites Class...,The service in Suites Class makes one feel lik...,0
2,2024-03-11T12:20:23-04:00,Desktop,1,review,"Booked, paid and received email confirmation f...",Don’t give them your money,0
3,2024-03-11T07:12:27-04:00,Desktop,5,review,"Best airline in the world, seats, food, servic...",Best Airline in the World,0
4,2024-03-10T05:34:18-04:00,Desktop,2,review,Premium Economy Seating on Singapore Airlines ...,Premium Economy Seating on Singapore Airlines ...,0


In [4]:
data = data[['text','rating']]

In [5]:
data

Unnamed: 0,text,rating
0,We used this airline to go from Singapore to L...,3
1,The service on Singapore Airlines Suites Class...,5
2,"Booked, paid and received email confirmation f...",1
3,"Best airline in the world, seats, food, servic...",5
4,Premium Economy Seating on Singapore Airlines ...,2
...,...,...
9995,First part done with Singapore Airlines - acce...,5
9996,And again a great Flight with Singapore Air. G...,5
9997,"We flew business class from Frankfurt, via Sin...",5
9998,"As always, the A380 aircraft was spotlessly pr...",4


In [6]:
data.isna().sum()

text      0
rating    0
dtype: int64

In [7]:
data['text'].head(15)

0     We used this airline to go from Singapore to L...
1     The service on Singapore Airlines Suites Class...
2     Booked, paid and received email confirmation f...
3     Best airline in the world, seats, food, servic...
4     Premium Economy Seating on Singapore Airlines ...
5     We booked our flights a full 9 months in advan...
6     This was a fascinating experience. As I sat in...
7     Very bad expeirence for flight check in at bai...
8     Respected Faculty, I am Bincy, writing this  t...
9     I would rate Singapore airlines the worse I ha...
10    Singapore Airlines (SQ), may be one of the lar...
11    Yesterday ( 6 March ) my flight was canceled b...
12    What has happened to Singapore Airlines? I use...
13    I am disabled and had a VERY BAD experience wi...
14    Well I have flown to Australia quite a few tim...
Name: text, dtype: object

In [8]:
stop_words = stopwords.words("english")
stem = PorterStemmer()
def text_cleaner(text:str):
    clean_txt = ""
    text = text.lower()
    tokens = word_tokenize(text)
    for tkn in tokens:
        if (tkn not in stop_words) and (tkn not in punctuation):
            stm_tkn = stem.stem(tkn)
            clean_txt += (stm_tkn + " ")
    clean_txt = clean_txt[:-1]
    return clean_txt

In [9]:
data['text'] = data['text'].apply(text_cleaner)

In [10]:
data['text'].head()

0    use airlin go singapor london heathrow issu ti...
1    servic singapor airlin suit class noth excel c...
2    book paid receiv email confirm extra legroom s...
3    best airlin world seat food servic brilliant c...
4    premium economi seat singapor airlin narrow se...
Name: text, dtype: object

In [11]:
tf_idf_vect = TfidfVectorizer(min_df=2)
X = tf_idf_vect.fit_transform(data['text'])

In [12]:
X.shape

(10000, 7496)

In [13]:
y = data['rating']

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33)

In [15]:
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)

In [16]:
knn.score(X_train,y_train)

0.7116417910447761

In [17]:
knn.score(X_test , y_test)

0.5966666666666667

In [18]:
svm = SVC(C=0.25)
svm.fit(X_train , y_train)

In [19]:
svm.score(X_train,y_train)

0.593134328358209

In [20]:
svm.score(X_test,y_test)

0.58