In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
import joblib

zomato_url = "https://raw.githubusercontent.com/FlipRoboTechnologies/ML_-Datasets/main/Z_Restaurant/zomato.csv"
country_code_url = "https://github.com/FlipRoboTechnologies/ML_-Datasets/blob/main/Z_Restaurant/Country-Code.xlsx?raw=true"

zomato = pd.read_csv(zomato_url, encoding='ISO-8859-1')
country_code = pd.read_excel(country_code_url)

data = pd.merge(zomato, country_code, on='Country Code', how='left')

print("First few rows of the merged dataset:")
print(data.head())

print("Dataset columns:")
print(data.columns)

data = data.ffill().bfill()

label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

X_regression = data.drop(['Average Cost for two', 'Price range'], axis=1)
y_regression = data['Average Cost for two']

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_regression, y_regression, test_size=0.2, random_state=42)

scaler_reg = StandardScaler()
X_train_reg = scaler_reg.fit_transform(X_train_reg)
X_test_reg = scaler_reg.transform(X_test_reg)

model_reg = LinearRegression()
model_reg.fit(X_train_reg, y_train_reg)

y_pred_reg = model_reg.predict(X_test_reg)

mse_reg = mean_squared_error(y_test_reg, y_pred_reg)
rmse_reg = mse_reg ** 0.5

print("Regression Model RMSE (Average Cost for two): {:.2f}".format(rmse_reg))

X_classification = data.drop(['Price range'], axis=1)
y_classification = data['Price range']

X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(X_classification, y_classification, test_size=0.2, random_state=42)

scaler_cls = StandardScaler()
X_train_cls = scaler_cls.fit_transform(X_train_cls)
X_test_cls = scaler_cls.transform(X_test_cls)

model_cls = LogisticRegression()
model_cls.fit(X_train_cls, y_train_cls)

y_pred_cls = model_cls.predict(X_test_cls)

accuracy_cls = accuracy_score(y_test_cls, y_pred_cls)
report_cls = classification_report(y_test_cls, y_pred_cls)

print("Classification Model Accuracy (Price range): {:.2f}%".format(accuracy_cls * 100))
print("Classification Report:")
print(report_cls)

joblib.dump(model_reg, 'linear_regression_avg_cost_model.pkl')
joblib.dump(scaler_reg, 'scaler_reg.pkl')
joblib.dump(model_cls, 'logistic_regression_price_range_model.pkl')
joblib.dump(scaler_cls, 'scaler_cls.pkl')
for column, encoder in label_encoders.items():
    joblib.dump(encoder, f'label_encoder_{column}.pkl')

print("Models and preprocessing objects saved.")


First few rows of the merged dataset:
   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, O

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
