Import required libraries

In [65]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

Load dataset

In [84]:
dataset = pd.read_csv("restaurant_data.csv")
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

Check and account for missing values

In [60]:
dataset.isnull().sum()
np.set_printoptions(threshold=np.inf)

Encode categorical data -> Location, Cuisine and Parking availability

In [85]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
column_transformer = ColumnTransformer(transformers = [("encoder", OneHotEncoder(), [0, 1, 12])], remainder = "passthrough")
X = np.array(column_transformer.fit_transform(X))

Split given data into training and test sets

In [110]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1)

Apply feature scaling to all the non categorical data to bring uniformity in the scale

In [111]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train[:, 11:] = ss.fit_transform(X_train[:, 11:])
X_test[:, 11:] = ss.transform(X_test[:, 11:])

Create and initialize the linear regression class and pass the training data to it

In [112]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

Use the linear regressor with the testing data with known outcomes and compare the predictions

In [113]:
y_pred = lr.predict(X_test)
np.set_printoptions(precision = 2)
val_comparison = np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1)

Average error in the model predictions

In [114]:
import statistics
errors = []
for i in range(len(val_comparison)):
    pos = val_comparison[i]
    e = (abs(pos[0]-pos[1])/y_test[i]) * 100
    errors.append(e)
print(f"Error = {round(statistics.mean(errors), 2)}%")

Error = 7.83%
