In [5]:
import pandas as pd

df = pd.read_csv(
    "dataset.csv",
    encoding="latin-1",
    engine="python",
    on_bad_lines="skip"
)

df.head()


Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [8]:
columns_needed = [
    "rate",
    "votes",
    "approx_cost(for two people)",
    "online_order",
    "book_table",
    "location",
    "cuisines"
]

df = df[columns_needed]
df.head()


Unnamed: 0,rate,votes,approx_cost(for two people),online_order,book_table,location,cuisines
0,4.1/5,775,800,Yes,Yes,Banashankari,"North Indian, Mughlai, Chinese"
1,4.1/5,787,800,Yes,No,Banashankari,"Chinese, North Indian, Thai"
2,3.8/5,918,800,Yes,No,Banashankari,"Cafe, Mexican, Italian"
3,3.7/5,88,300,No,No,Banashankari,"South Indian, North Indian"
4,3.8/5,166,600,No,No,Basavanagudi,"North Indian, Rajasthani"


In [9]:
# Remove rows with missing or invalid ratings
df = df[df["rate"].notna()]
df = df[df["rate"] != "NEW"]
df = df[df["rate"] != "-"]

# Convert "4.1/5" â†’ 4.1
df["rate"] = df["rate"].apply(lambda x: float(x.split("/")[0]))


In [10]:
# Remove commas and convert cost to numeric
df["approx_cost(for two people)"] = df["approx_cost(for two people)"].str.replace(",", "")
df["approx_cost(for two people)"] = df["approx_cost(for two people)"].astype(float)


In [11]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

df["location"] = le.fit_transform(df["location"])
df["cuisines"] = le.fit_transform(df["cuisines"])
df["online_order"] = le.fit_transform(df["online_order"])
df["book_table"] = le.fit_transform(df["book_table"])


In [12]:
df.isnull().sum()


Unnamed: 0,0
rate,0
votes,0
approx_cost(for two people),147
online_order,0
book_table,0
location,0
cuisines,0


In [13]:
df.dropna(inplace=True)


In [14]:
X = df.drop("rate", axis=1)
y = df["rate"]


In [15]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [16]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)


In [17]:
y_pred = model.predict(X_test)


In [18]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse, r2


(0.12945240950102488, 0.28312325727111387)

In [19]:
import pandas as pd

feature_importance = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})

feature_importance.sort_values(by="Coefficient", ascending=False)


Unnamed: 0,Feature,Coefficient
3,book_table,0.26509
2,online_order,0.098217
4,location,0.001321
0,votes,0.000159
1,approx_cost(for two people),0.000128
5,cuisines,-3.8e-05


## Model Interpretation

From the trained Linear Regression model, we observe that the feature **book_table** has the highest positive influence on restaurant ratings. This indicates that restaurants offering table booking tend to receive higher ratings.

The **online_order** feature also shows a positive impact, meaning customers prefer restaurants that provide online ordering facilities. Features such as **location** and **votes** have a smaller but positive effect on ratings, suggesting that popularity and area contribute moderately.

The **average cost for two people** has minimal influence on ratings, while **cuisines** shows a slightly negative coefficient, indicating that cuisine type alone does not strongly affect restaurant ratings.
