In [100]:
import os
import re
import string
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer,ENGLISH_STOP_WORDS
from sklearn.linear_model import LogisticRegression

In [58]:
#warnings :)
import warnings
warnings.filterwarnings('ignore')


In [59]:
df=pd.read_csv('train.csv')

In [60]:
df

Unnamed: 0,User_ID,Description,Browser_Used,Device_Used,Is_Response
0,id10326,The room was kind of clean but had a VERY stro...,Edge,Mobile,not happy
1,id10327,I stayed at the Crown Plaza April -- - April -...,Internet Explorer,Mobile,not happy
2,id10328,I booked this hotel through Hotwire at the low...,Mozilla,Tablet,not happy
3,id10329,Stayed here with husband and sons on the way t...,InternetExplorer,Desktop,happy
4,id10330,My girlfriends and I stayed here to celebrate ...,Edge,Tablet,not happy
...,...,...,...,...,...
38927,id49253,We arrived late at night and walked in to a ch...,Edge,Desktop,happy
38928,id49254,The only positive impression is location and p...,InternetExplorer,Mobile,not happy
38929,id49255,Traveling with friends for shopping and a show...,Firefox,Mobile,not happy
38930,id49256,The experience was just ok. We paid extra for ...,Chrome,Desktop,not happy


In [61]:
df=df[['Description','Is_Response']]
df

Unnamed: 0,Description,Is_Response
0,The room was kind of clean but had a VERY stro...,not happy
1,I stayed at the Crown Plaza April -- - April -...,not happy
2,I booked this hotel through Hotwire at the low...,not happy
3,Stayed here with husband and sons on the way t...,happy
4,My girlfriends and I stayed here to celebrate ...,not happy
...,...,...
38927,We arrived late at night and walked in to a ch...,happy
38928,The only positive impression is location and p...,not happy
38929,Traveling with friends for shopping and a show...,not happy
38930,The experience was just ok. We paid extra for ...,not happy


In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38932 entries, 0 to 38931
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Description  38932 non-null  object
 1   Is_Response  38932 non-null  object
dtypes: object(2)
memory usage: 608.4+ KB


In [63]:
df.isna().sum()

Description    0
Is_Response    0
dtype: int64

In [64]:
value_count=df.Is_Response.value_counts()
print(value_count)
print(round(df.Is_Response.value_counts(normalize=True)*100,2))

happy        26521
not happy    12411
Name: Is_Response, dtype: int64
happy        68.12
not happy    31.88
Name: Is_Response, dtype: float64


In [65]:
# Apply first level cleaning

def text_clean(text):
    text=text.lower()   #lower the all vocabalary
    text=re.sub('\[.*?\]', '', text)
    text=text.translate(str.maketrans('','',string.punctuation))
    text = re.sub('\w*\d\w*', '', text)
    text = re.sub('[‘’“”…]', '', text)
    text = re.sub('\n', '', text)
    return text
    
cleaned= (lambda x: text_clean(x))

In [66]:
df['cleaned_desc']=df.Description.apply(cleaned)

In [67]:
df

Unnamed: 0,Description,Is_Response,cleaned_desc
0,The room was kind of clean but had a VERY stro...,not happy,the room was kind of clean but had a very stro...
1,I stayed at the Crown Plaza April -- - April -...,not happy,i stayed at the crown plaza april april th...
2,I booked this hotel through Hotwire at the low...,not happy,i booked this hotel through hotwire at the low...
3,Stayed here with husband and sons on the way t...,happy,stayed here with husband and sons on the way t...
4,My girlfriends and I stayed here to celebrate ...,not happy,my girlfriends and i stayed here to celebrate ...
...,...,...,...
38927,We arrived late at night and walked in to a ch...,happy,we arrived late at night and walked in to a ch...
38928,The only positive impression is location and p...,not happy,the only positive impression is location and p...
38929,Traveling with friends for shopping and a show...,not happy,traveling with friends for shopping and a show...
38930,The experience was just ok. We paid extra for ...,not happy,the experience was just ok we paid extra for a...


In [85]:
x=df['cleaned_desc']
y=df['Is_Response']

In [89]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1, random_state=101)
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(35038,) (3894,) (35038,) (3894,)


In [101]:
tvec = TfidfVectorizer()
clf2 = LogisticRegression(solver = "lbfgs")


from sklearn.pipeline import Pipeline

In [105]:
model = Pipeline([('vectorizer',tvec),('classifier',clf2)])

model.fit(x_train, y_train)


from sklearn.metrics import confusion_matrix

y_pred_test = model.predict(x_test)

confusion_matrix(y_test,y_pred_test)

array([[2539,  178],
       [ 274,  903]], dtype=int64)

In [106]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(y_test,y_pred_test))
print("Precision : ", precision_score(y_test,y_pred_test, average = 'weighted'))
print("Recall : ", recall_score(y_test,y_pred_test, average = 'weighted'))

Accuracy :  0.8839239856189008
Precision :  0.882265866817503
Recall :  0.8839239856189008


In [118]:
example = ["I'm frustrated"]
result = model.predict(example)

print(result)

['not happy']


In [108]:
from sklearn.naive_bayes import BernoulliNB
nb_cf=BernoulliNB()
model1 = Pipeline([('vectorizer',tvec),('naive_bayes',nb_cf)])
model1.fit(x_train,y_train)

Pipeline(steps=[('vectorizer', TfidfVectorizer()),
                ('naive_bayes', BernoulliNB())])

In [109]:
y_pred_train1=model1.predict(x_train)
y_pred_test1=model1.predict(x_test)

In [104]:
confusion_matrix(y_test,y_pred_test1)

array([[2384,  333],
       [ 475,  702]], dtype=int64)

In [110]:
print("Accuracy : ", accuracy_score(y_test,y_pred_test1))
print("Precision : ", precision_score(y_test,y_pred_test1, average = 'weighted'))
print("Recall : ", recall_score(y_test,y_pred_test1, average = 'weighted'))

Accuracy :  0.7925012840267077
Precision :  0.7868272235672704
Recall :  0.7925012840267077


In [119]:
example = ["I'm frustrated"]
result = model1.predict(example)

print(result)

['happy']
