# Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import classification_report 

In [2]:
# Reading from processed data
df = pd.read_csv('../data/final_combined_data/final_data_with_bow.csv')

label_to_int = {
        'Pizza': 0,
        'Shawarma': 1,
        'Sushi': 2
    }

df['Label'] = df['Label'].map(label_to_int)


In [3]:
df.head()
summary = df.describe()

In [4]:
# Splitting the data into training and testing sets
X = df.drop('Label', axis=1)  
y = df['Label']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Create a model
model = LogisticRegression(multi_class='multinomial')

# Hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'max_iter': [500, 1000, 1500, 2000],
}
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=5,  
    scoring='accuracy',  
)
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)

# Predicting the test set results
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
# Evaluating the model
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Pizza', 'Shawarma', 'Sushi']))

Accuracy: 0.8502024291497976
Classification Report:
              precision    recall  f1-score   support

       Pizza       0.84      0.89      0.86       140
    Shawarma       0.89      0.84      0.86       188
       Sushi       0.82      0.83      0.83       166

    accuracy                           0.85       494
   macro avg       0.85      0.85      0.85       494
weighted avg       0.85      0.85      0.85       494



In [12]:
y_pred_full = best_model.predict(X)

accuracy_full = np.mean(y_pred_full == y)
print("Accuracy:", accuracy_full)
print("Classification Report:")
print(classification_report(y, y_pred_full, target_names=['Pizza', 'Shawarma', 'Sushi']))

Accuracy: 0.8497566909975669
Classification Report:
              precision    recall  f1-score   support

       Pizza       0.85      0.89      0.87       548
    Shawarma       0.86      0.84      0.85       548
       Sushi       0.85      0.82      0.84       548

    accuracy                           0.85      1644
   macro avg       0.85      0.85      0.85      1644
weighted avg       0.85      0.85      0.85      1644



In [13]:
y_pred_train = best_model.predict(X_train)

accuracy_train = np.mean(y_pred_train == y_train)
print("Accuracy:", accuracy_train)
print("Classification Report:")
print(classification_report(y_train, y_pred_train, target_names=['Pizza', 'Shawarma', 'Sushi']))

Accuracy: 0.8495652173913043
Classification Report:
              precision    recall  f1-score   support

       Pizza       0.85      0.89      0.87       408
    Shawarma       0.84      0.84      0.84       360
       Sushi       0.86      0.82      0.84       382

    accuracy                           0.85      1150
   macro avg       0.85      0.85      0.85      1150
weighted avg       0.85      0.85      0.85      1150

