In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("real_estate.csv")

In [3]:
df.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 8 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   No                                      414 non-null    int64  
 1   X1 transaction date                     414 non-null    float64
 2   X2 house age                            414 non-null    float64
 3   X3 distance to the nearest MRT station  414 non-null    float64
 4   X4 number of convenience stores         414 non-null    int64  
 5   X5 latitude                             414 non-null    float64
 6   X6 longitude                            414 non-null    float64
 7   Y house price of unit area              414 non-null    float64
dtypes: float64(6), int64(2)
memory usage: 26.0 KB


## Splitting of Data

In [5]:
x = df.drop(['Y house price of unit area'],axis=1)
y = df['Y house price of unit area']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train ,x_test , y_train , y_test = train_test_split(x,y,test_size=0.2, random_state = 42)

In [8]:
print(x_train.shape ,x_test.shape)

(331, 7) (83, 7)


## Applying Linear Regression

In [9]:
from sklearn.linear_model import LinearRegression

In [10]:
lr  =LinearRegression()

In [11]:
lr.fit(x_train , y_train)

LinearRegression()

In [12]:
pred = lr.predict(x_test)

In [13]:
from sklearn.metrics import r2_score

In [14]:
print(r2_score(pred , y_test))

0.553933126024727


## Applying Ridge Regression

In [15]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

In [80]:
rr = Ridge(alpha = 1e-15,tol = 0.2)

In [81]:
rr.fit(x_train , y_train)

Ridge(alpha=1e-15, tol=0.2)

In [82]:
pred = rr.predict(x_test)

In [83]:
from sklearn.metrics import r2_score

In [84]:
print(r2_score(pred , y_test))

0.5539331260246856


In [85]:
print(mean_squared_error(pred , y_test))

54.60196067337543


**same attributes as they are in lasso**

## Apply Ridge Regression using GridSearchCV

**alpha : hyper parameter<br/><br/>**
**max_iter : no of iteration data should be passed ,It controls how many steps you will take in the gradient descent before giving up<br/><br/>**
**tol : 'tol' controls how close you want to be , the smaller tol , the more accurate your final solution will be , but the longer it will take , a tolerance is a threshold which, if crossed, stops the iterations of a solver.https://www.mathworks.com/help/optim/ug/tolerances-and-stopping-criteria.html
<br/><br/>**
    **solver : auto (choose best solver for data), cholesky , lsqr : least square methods , sparse_cg , sag : stochastic average gradient ,saga :  stochastic average gradient (SAGA) algorithm , lbfgs : Limited-memory Broyden–Fletcher–Goldfarb–Shanno<br/>**

In [22]:
from sklearn.model_selection import GridSearchCV

In [105]:
alpha = {'alpha' : [1e-15 , 1e-10 , 1e-8 , 1e-3 , 1e-2 , 1 , 5 , 10 , 20 , 30 , 35 , 40 , 45 , 50 , 55 , 100],
#          'max_iter' : [100 , 200 ,300, 400 ,500 , 600 ,700 ,800 , 900,1000 ],
         'solver' :['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs']
        }

In [106]:
rr = Ridge()
rrg = GridSearchCV(rr , alpha , scoring = 'r2',cv =5)

In [107]:
rrg.fit(x_train , y_train)

80 fits failed out of a total of 560.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
80 fits failed with the following error:
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\linear_model\_ridge.py", line 1011, in fit
    return super().fit(X, y, sample_weight=sample_weight)
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\linear_model\_ridge.py", line 705, in fit
    raise ValueError(
ValueError: 'lbfgs' solver can be used only when positive=True. Please use another solver.

        nan 0.53755763 0.53755763 0.4580

GridSearchCV(cv=5, estimator=Ridge(),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100],
                         'solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg',
                                    'sag', 'saga', 'lbfgs']},
             scoring='r2')

In [109]:
print(rrg.best_params_)

{'alpha': 0.001, 'solver': 'cholesky'}


In [110]:
print(rrg.best_score_)

0.5378407802055154


In [113]:
pred = rrg.predict(x_test)

In [114]:
print(r2_score(pred , y_test))

0.5534258871787789


## Applying Lasso Regression

In [154]:
from sklearn.linear_model import Lasso

In [155]:
lasso_reg = Lasso(alpha = 0.001,selection = "cyclic")

In [156]:
lasso_reg.fit(x_train , y_train)

Lasso(alpha=0.001)

In [157]:
pred = lasso_reg.predict(x_test)

In [158]:
print(r2_score(pred,y_test))

0.5538119131372168


In [159]:
lasso_reg.dual_gap_

1.1417559303028166e-05

In [160]:
lasso_reg.n_iter_

37

In [161]:
lasso_reg.sparse_coef_

<1x7 sparse matrix of type '<class 'numpy.float64'>'
	with 7 stored elements in Compressed Sparse Row format>

In [162]:
lasso_reg.coef_

array([-5.64831097e-03,  5.39794541e+00, -2.66936339e-01, -4.74631674e-03,
        1.08956754e+00,  2.17316203e+02, -1.90221155e+01])

## Applying Lasso Regression using GridSearchCV

In [50]:
from sklearn.model_selection import GridSearchCV

In [137]:
params = {
    'alpha' : [1e-15 , 1e-10 , 1e-8 , 1e-3 , 1e-2 , 1 , 5 , 10 , 20 , 30 , 35 , 40 , 45 , 50 , 55 , 100],
    'max_iter' : [100 , 200 ,300, 400 ,500 , 600 ,700 ,800 , 900,1000 ],
    'selection' : ['cyclic', 'random']
}

In [149]:
laso_reg =Lasso()
lasso_reg_gscv = GridSearchCV(lasso_reg, params , scoring = 'r2',cv =5)

In [150]:
%%time
lasso_reg_gscv.fit(x_train , y_train)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Wall time: 6.75 s


GridSearchCV(cv=5, estimator=Lasso(alpha=0.001),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100],
                         'max_iter': [100, 200, 300, 400, 500, 600, 700, 800,
                                      900, 1000],
                         'selection': ['cyclic', 'random']},
             scoring='r2')

In [151]:
print(lasso_reg_gscv.best_score_)

0.53786813056093


In [152]:
print(lasso_reg_gscv.best_params_)

{'alpha': 0.001, 'max_iter': 100, 'selection': 'random'}
