STEP 1 : Data Collection

In [None]:
import pandas as pd
data = pd.read_csv(r'C:\Users\Haris Ali\Restaurant_Reviews.tsv',delimiter='\t',encoding='utf-8')
data.head()

Step 2 : Data Profiling

In [None]:
print(data.info())
print(data.describe())

Step 3  : Data Quality

In [None]:
data_cleaned = data.drop_duplicates()
print(data_cleaned.isnull().sum())

Step 4 : Data Selection

In [None]:
X = data_cleaned['Review']
y = data_cleaned['Liked']

Step 5: Exploratory Data Analysis (EDA)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8,6))
sns.countplot(x='Liked', data = data)
plt.title('Sentiment Analysis (Liked)')
plt.xlabel(' Liked (1 = Positive , 0 = Negative )')
plt.ylabel('Count')
plt.show()

Step 6: Selection of Features

In [None]:
data = data[['Review','Liked']]
print('\nSelected Data (Liked & Review ):')
print(data.head())

Step 7: Selection of Model

In [None]:
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

Step 8: Model Training

In [None]:
X_train,X_test , y_train , y_test = train_test_split(X,y,test_size=0.2 , random_state=42)

#Tfidf Vectorization
tfidf = TfidfVectorizer(max_features=500 , stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf =tfidf.transform(X_test)

Step 9: Model Tuning / Hyper-parameter Tuning


In [None]:
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_tfidf, y_train)
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear']  # Since you want a linear kernel, we keep this fixed
}
grid_search = GridSearchCV(svm_model , param_grid , cv=5 , scoring ='accuracy')
grid_search.fit(X_train_tfidf, y_train)

Step 10 : Cross_Validation

In [None]:
best_model = grid_search.best_estimator_

Step 11 : Predict on Test Class

In [None]:
y_pred = svm_model.predict(X_test_tfidf)
# Evaluate the model
print("Accuracy on Test Set:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))