In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
# Load Zomato dataset
zomato_df = pd.read_csv("https://github.com/dsrscientist/dataset4/raw/main/zomato.csv")

# Load Country Code dataset
country_code_df = pd.read_excel("https://github.com/dsrscientist/dataset4/raw/main/Country-Code.xlsx")


In [None]:
# Merge datasets using Country Code
merged_df = pd.merge(zomato_df, country_code_df, how="left", left_on="Country Code", right_on="Country code")

# Drop redundant columns
merged_df.drop(columns=["Country code"], inplace=True)


In [None]:
# Explore the merged dataset
print(merged_df.head())

# Check for missing values
print(merged_df.isnull().sum())

# Handle missing values if needed
# ...

# Encode categorical variables if needed
# ...


In [None]:
# Choose features and target variables
X = merged_df[['Aggregate rating', 'Votes', 'Longitude', 'Latitude']]
y_avg_cost_for_two = merged_df['Average Cost for two']
y_price_range = merged_df['Price range']

# Split the data into training and testing sets
X_train, X_test, y_train_avg_cost, y_test_avg_cost, y_train_price_range, y_test_price_range = train_test_split(
    X, y_avg_cost_for_two, y_price_range, test_size=0.2, random_state=42
)


In [None]:
# Train linear regression models
model_avg_cost = LinearRegression()
model_price_range = LinearRegression()

model_avg_cost.fit(X_train, y_train_avg_cost)
model_price_range.fit(X_train, y_train_price_range)

# Make predictions
y_pred_avg_cost = model_avg_cost.predict(X_test)
y_pred_price_range = model_price_range.predict(X_test)

# Evaluate the models
mse_avg_cost = mean_squared_error(y_test_avg_cost, y_pred_avg_cost)
r2_avg_cost = r2_score(y_test_avg_cost, y_pred_avg_cost)

mse_price_range = mean_squared_error(y_test_price_range, y_pred_price_range)
r2_price_range = r2_score(y_test_price_range, y_pred_price_range)

print(f"Mean Squared Error (Average Cost for Two): {mse_avg_cost}")
print(f"R^2 Score (Average Cost for Two): {r2_avg_cost}")
print(f"Mean Squared Error (Price Range): {mse_price_range}")
print(f"R^2 Score (Price Range): {r2_price_range}")
