In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [9]:
# Load dataset
data = pd.read_csv('Hotel.csv') 
data.head()

Unnamed: 0,Review,Rating
0,nice hotel expensive parking got good deal sta...,4
1,ok nothing special charge diamond member hilto...,2
2,nice rooms not 4* experience hotel monaco seat...,3
3,"unique, great stay, wonderful time hotel monac...",5
4,"great stay great stay, went seahawk game aweso...",5


In [10]:
data.shape

(20491, 2)

In [11]:
data.size

40982

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20491 entries, 0 to 20490
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  20491 non-null  object
 1   Rating  20491 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 320.3+ KB


In [13]:
data.isnull().sum()

Review    0
Rating    0
dtype: int64

In [14]:
data.duplicated().sum()

0

In [15]:
data['Sentimental'] = data['Rating'].apply(lambda x: 'Positive' if x in [3, 4, 5] else 'Negative') 

In [16]:

x_train, x_test, y_train, y_test = train_test_split(data['Review'], data['Sentimental'], test_size=0.2, random_state=42)

In [17]:
tfidf_vectorizer = TfidfVectorizer(max_features=10000)
x_train_tfidf = tfidf_vectorizer.fit_transform(x_train)
x_test_tfidf = tfidf_vectorizer.transform(x_test)

In [20]:
# svc
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(x_train_tfidf, y_train)

In [21]:
# Model evaluation
y_pred = svm_classifier.predict(x_test_tfidf)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    Negative       0.85      0.72      0.78       625
    Positive       0.95      0.98      0.96      3474

    accuracy                           0.94      4099
   macro avg       0.90      0.85      0.87      4099
weighted avg       0.94      0.94      0.94      4099



In [22]:
# Function to map sentiment to ratings based on text content
def map_sentiment_to_rating(sentimental, text):
    if sentimental == 'Positive':
        if 'excellent' in text.lower() or 'highly recommend' in text.lower():
            return [5]
        elif 'good' in text.lower() or 'satisfied' in text.lower():
            return [4]
        else:
            return [3]  # Default positive rating
    else:
        if 'poor experience' in text.lower() or 'disappointed' in text.lower() or 'unsatisfied' in text.lower():
            return [1]
        elif 'bad' in text.lower() or 'not good' in text.lower():
            return [2]
        else:
            return [1]  # Default negative rating

In [17]:
# Predict sentiment on new data
new_texts = ["Great ambience, highly recommend it!", "Poor quality, disappointed with the service.","happy", "bad experience","satisfied"]
new_texts_tfidf = tfidf_vectorizer.transform(new_texts)
predictions = svm_classifier.predict(new_texts_tfidf)

In [18]:
# Map predictions to ratings
for text, prediction in zip(new_texts, predictions):
    predicted_rating = map_sentiment_to_rating(prediction, text)
    print(f"Text: {text}\nPredicted Sentiment: {prediction} Predicted Rating: {predicted_rating}\n")


Text: Great ambience, highly recommend it!
Predicted Sentiment: Positive Predicted Rating: [5]

Text: Poor quality, disappointed with the service.
Predicted Sentiment: Negative Predicted Rating: [1]

Text: happy
Predicted Sentiment: Positive Predicted Rating: [3]

Text: bad experience
Predicted Sentiment: Negative Predicted Rating: [2]

Text: satisfied
Predicted Sentiment: Positive Predicted Rating: [4]

