In [25]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error,r2_score

In [4]:
np.random.seed(42)

size = np.random.randint(1000,4000,100)
bedrooms = np.random.randint(1,5,100)
age = np.random.randint(1,50,100)
distance = np.random.randint(1,30,100)
price = 5000 * size + 20000 * bedrooms - 3000 * age - 1500 * distance + np.random.randint(-50000,50000,100)

In [7]:
df = pd.DataFrame({"Size": size, "Bedrooms": bedrooms, "Age": age, "Distance":distance, "Price":price})

In [8]:
df.head()

Unnamed: 0,Size,Bedrooms,Age,Distance,Price
0,1860,4,28,12,9273236
1,2294,4,25,5,11483735
2,2130,4,39,5,10609740
3,2095,3,33,27,10411226
4,2638,3,1,23,13172992


In [9]:
x = df[['Size','Bedrooms',"Age","Distance"]]
y = df["Price"]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

In [12]:
scaler = StandardScaler()

In [13]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [14]:
model = LinearRegression()

In [15]:
model.fit(X_train_scaled,y_train)

In [18]:
ridge = Ridge(alpha=10)
ridge.fit(X_train_scaled,y_train)

In [19]:
lasso = Lasso(alpha=1000)
lasso.fit(X_train_scaled,y_train)

In [21]:
coef_df = pd.DataFrame({"Feature": x.columns,"linear": model.coef_, "Ridge": ridge.coef_,"Lasso":lasso.coef_})
coef_df

Unnamed: 0,Feature,linear,Ridge,Lasso
0,Size,4309664.0,3830258.0,4308612.0
1,Bedrooms,16652.88,-342.969,15444.27
2,Age,-43823.95,-33763.68,-42641.38


In [16]:
y_pred = model.predict(X_test_scaled)

In [17]:
y_pred

array([16737838.55059477, 19035063.32049828,  8458010.65433889,
       15980126.57558004, 19461971.58878125, 11262224.61329974,
       10870236.18933947,  7177990.15406724, 15667038.3476648 ,
        9267511.09527047, 17135225.51986077, 17686635.28978138,
       12846383.28352542,  9845172.31182504,  8740658.34399255,
       13225313.49408017, 14939041.64724034,  9889327.24863929,
        5572108.2470052 , 15180410.33710668])

In [26]:
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
r2 = r2_score(y_test,y_pred)
print(rmse)
print(r2)

35636.882663343116
0.9999218827152694
