In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from scipy.stats import uniform
import warnings
warnings.filterwarnings("ignore")

In [2]:
dataset=pd.read_csv("mall_rental_data.csv")
dataset

Unnamed: 0,Shop_ID,Mall_Name,City,Floor,Shop_Size_sqft,Footfall_per_day,Nearby_Brands,Has_Food_Court,Monthly_Sales,Rent
0,S001,Lulu Mall,Delhi,2,352,6311,2,0,962727,67310
1,S002,Forum Mall,Chennai,4,2961,802,16,0,549722,45221
2,S003,VR Chennai,Mumbai,0,1402,1138,9,0,1783693,3704
3,S004,Forum Mall,Mumbai,4,1849,555,12,0,608924,68503
4,S005,Forum Mall,Bangalore,3,1400,9217,13,1,1834033,85666
...,...,...,...,...,...,...,...,...,...,...
195,S196,VR Chennai,Kochi,4,1430,845,3,1,1352549,44571
196,S197,VR Chennai,Kochi,4,2205,9054,10,0,1381673,65858
197,S198,Phoenix Marketcity,Chennai,4,774,2943,10,1,1475402,41831
198,S199,Forum Mall,Bangalore,4,2040,8283,9,0,1612833,49625


In [3]:
dataset.shape

(200, 10)

In [4]:
dataset=dataset.drop("Shop_ID",axis=1)
dataset=pd.get_dummies(dataset,dtype=int,drop_first=True)
dataset

Unnamed: 0,Floor,Shop_Size_sqft,Footfall_per_day,Nearby_Brands,Has_Food_Court,Monthly_Sales,Rent,Mall_Name_Forum Mall,Mall_Name_Lulu Mall,Mall_Name_Phoenix Marketcity,Mall_Name_VR Chennai,City_Chennai,City_Delhi,City_Kochi,City_Mumbai
0,2,352,6311,2,0,962727,67310,0,1,0,0,0,1,0,0
1,4,2961,802,16,0,549722,45221,1,0,0,0,1,0,0,0
2,0,1402,1138,9,0,1783693,3704,0,0,0,1,0,0,0,1
3,4,1849,555,12,0,608924,68503,1,0,0,0,0,0,0,1
4,3,1400,9217,13,1,1834033,85666,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,4,1430,845,3,1,1352549,44571,0,0,0,1,0,0,1,0
196,4,2205,9054,10,0,1381673,65858,0,0,0,1,0,0,1,0
197,4,774,2943,10,1,1475402,41831,0,0,1,0,1,0,0,0
198,4,2040,8283,9,0,1612833,49625,1,0,0,0,0,0,0,0


In [5]:
indep_X=dataset.drop("Rent", axis=1)
dep_Y=dataset["Rent"]

In [6]:
n = 4
test = SelectKBest(score_func=chi2, k=n)
test.fit(indep_X, dep_Y)
selected_features = test.transform(indep_X)

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(selected_features, dep_Y, test_size=0.30, random_state=0)
pipeline = Pipeline([('scale', StandardScaler()),
    ('ridge', Ridge())
])

In [8]:
param_dist = {'ridge__alpha': uniform(0, 10)}
search = RandomizedSearchCV(pipeline,param_distributions=param_dist,n_iter=10,scoring='r2',cv=3,random_state=42)
search.fit(X_train, Y_train)
best_model = search.best_estimator_

In [9]:
r2 = search.score(X_test, Y_test)

print("Best Ridge Model:", best_model)
print("R² Score:", r2)

Best Ridge Model: Pipeline(steps=[('scale', StandardScaler()),
                ('ridge', Ridge(alpha=7.080725777960454))])
R² Score: 0.24542958521945923


In [10]:
with open('finalized_model_linear_pipeline.sav', 'wb') as f:
    pickle.dump(best_model, f)

In [11]:
loaded_model=pickle.load(open("finalized_model_linear_pipeline.sav",'rb'))
result=loaded_model.predict([[256, 3455, 1, 43355]])
result

array([32849.0979156])