<a href="https://colab.research.google.com/github/Korniev/Data-Sciense/blob/main/udemy/Feature_scaling_multiple_LinReg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv('/content/real_estate_price_size_year.csv')
data

Unnamed: 0,price,size,year
0,234314.144,643.09,2015
1,228581.528,656.22,2009
2,281626.336,487.29,2018
3,401255.608,1504.75,2015
4,458674.256,1275.46,2009
...,...,...,...
95,252460.400,549.80,2009
96,310522.592,1037.44,2009
97,383635.568,1504.75,2006
98,225145.248,648.29,2015


In [4]:
data.describe()

Unnamed: 0,price,size,year
count,100.0,100.0,100.0
mean,292289.47016,853.0242,2012.6
std,77051.727525,297.941951,4.729021
min,154282.128,479.75,2006.0
25%,234280.148,643.33,2009.0
50%,280590.716,696.405,2015.0
75%,335723.696,1029.3225,2018.0
max,500681.128,1842.51,2018.0


###Linear regression

###Declare variables

In [5]:
x = data[['size','year']]
y = data['price']

### Scale the inputs

In [6]:
scaler = StandardScaler()
scaler.fit(x)
x_scaled = scaler.transform(x)

In [7]:
x_scaled

array([[-0.70816415,  0.51006137],
       [-0.66387316, -0.76509206],
       [-1.23371919,  1.14763808],
       [ 2.19844528,  0.51006137],
       [ 1.42498884, -0.76509206],
       [-0.937209  , -1.40266877],
       [-0.95171405,  0.51006137],
       [-0.78328682, -1.40266877],
       [-0.57603328,  1.14763808],
       [-0.53467702, -0.76509206],
       [ 0.69939906, -0.76509206],
       [ 3.33780001, -0.76509206],
       [-0.53467702,  0.51006137],
       [ 0.52699137,  1.14763808],
       [ 1.51100715, -1.40266877],
       [ 1.77668568, -1.40266877],
       [-0.54810263,  1.14763808],
       [-0.77276222, -1.40266877],
       [-0.58004747, -1.40266877],
       [ 0.58943055,  1.14763808],
       [-0.78365788,  0.51006137],
       [-1.02322731,  0.51006137],
       [ 1.19557293,  0.51006137],
       [-1.12884431,  0.51006137],
       [-1.10378093, -0.76509206],
       [ 0.84424715,  1.14763808],
       [-0.95171405,  1.14763808],
       [ 1.62279723,  0.51006137],
       [-0.58004747,

###Regression

In [8]:
reg = LinearRegression()
reg.fit(x_scaled,y)

###Finding intercept and coefficient

In [9]:
reg.intercept_

292289.4701599997

In [10]:
reg.coef_

array([67501.57614152, 13724.39708231])

### Calculate the R-squared

In [12]:
reg.score(x_scaled,y)

0.7764803683276793

### Calculate the Adjusted R-squared

In [13]:
# Let's use the handy function we created
def adj_r2(x,y):
    r2 = reg.score(x,y)
    n = x.shape[0]
    p = x.shape[1]
    adjusted_r2 = 1-(1-r2)*(n-1)/(n-p-1)
    return adjusted_r2

In [14]:
adj_r2(x_scaled, y)

0.77187171612825

### Making predictions

Find the predicted price of an apartment that has a size of 750 sq.ft. from 2009.

In [18]:
new_data = [[750,2009]]
new_data_scaled = scaler.transform(new_data)



In [19]:
reg.predict(new_data_scaled)

array([258330.34465995])

### Calculate the univariate p-values of the variables

In [20]:
from sklearn.feature_selection import f_regression

In [21]:
f_regression(x_scaled, y)

(array([285.92105192,   0.85525799]), array([8.12763222e-31, 3.57340758e-01]))

In [22]:
p_values = f_regression(x,y)[1]
p_values

array([8.12763222e-31, 3.57340758e-01])

In [23]:
p_values.round(3)

array([0.   , 0.357])

### Create a summary table with your findings

In [24]:
reg_summary = pd.DataFrame(data = x.columns.values, columns=['Features'])
reg_summary ['Coefficients'] = reg.coef_
reg_summary ['p-values'] = p_values.round(3)
reg_summary

Unnamed: 0,Features,Coefficients,p-values
0,size,67501.576142,0.0
1,year,13724.397082,0.357


It seems that 'Year' is not event significant, therefore we should remove it from the model.

Note that this dataset is extremely clean and probably artificially created, therefore standardization does not really bring any value to it.