# Regression Analysis


### Libraries 

#### Pandas, Numpy

In [1]:
import pandas as pd
import numpy as np

#### Scikit learn

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor

In [4]:
from sklearn.metrics import r2_score

#### Warnings

In [5]:
import warnings
warnings.filterwarnings('ignore')

## Step 1 Data Import

In [6]:
data = pd.read_csv(r"C:\Users\91997\Desktop\Resume Projects\Regression\SecondCar.csv") # Dataframe/table 1 name is 'data'
data

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price
0,Tata Indica Vista Aqua 1.4 TDI,2010,120000,Diesel,Individual,Manual,Second Owner,9.00,106001,100000
1,Tata Nano Lx BSIV,2012,50000,Petrol,Individual,Manual,Second Owner,9.00,108556,100000
2,Tata Indica Vista Aqua TDI BSIII,2011,70000,Diesel,Individual,Manual,First Owner,11.00,120678,120000
3,Maruti Wagon R LXI Minor,2010,80000,Petrol,Individual,Manual,Second Owner,9.00,122917,100000
4,Chevrolet Beat LT,2010,80000,Petrol,Individual,Manual,Second Owner,10.00,144902,130000
...,...,...,...,...,...,...,...,...,...,...
4335,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9407487,4950000
4336,BMW 5 Series 520d Luxury Line,2019,12999,Diesel,Dealer,Automatic,First Owner,9.71,9598350,4800000
4337,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9857238,4950000
4338,Audi RS7 2015-2019 Sportback Performance,2016,13000,Petrol,Dealer,Automatic,First Owner,14.09,14235729,8900000


## Data Preprocessing

#### Steps
    1. Duplicate Removal
    2. Missing Value Treatment
    3. Unique Value Treatment
    4. Feature Engineering
    5. Correlation

## Step 3 Duplicate Val Removal

In [7]:
print("Before Duplicate removal Dataframe : ")
len(data)

Before Duplicate removal Dataframe : 


4340

In [8]:
data = data.drop_duplicates(keep = 'first') # Removes any duplicate or repeated row

In [9]:
print("After Duplicate removal Dataframe : ")
len(data)

After Duplicate removal Dataframe : 


4340

## Step 4 Missing Value Treatment

In [10]:
missing_value_df = pd.DataFrame(data.isna().sum(), columns = ['Missing_Value_Count']) # table 2 created by us
missing_value_df

Unnamed: 0,Missing_Value_Count
name,0
year,0
km_driven,0
fuel,3
seller_type,2
transmission,2
owner,3
Rating,4
ExShowroom Price,0
selling_price,0


In [11]:
print(100*data.isna().sum()/len(data), "%") # Sum of null values from entire df divided by length of entire df 

name                0.000000
year                0.000000
km_driven           0.000000
fuel                0.069124
seller_type         0.046083
transmission        0.046083
owner               0.069124
Rating              0.092166
ExShowroom Price    0.000000
selling_price       0.000000
dtype: float64 %


In [12]:
missing_value_df['Percentage'] = 100*data.isna().sum()/len(data)
missing_value_df

Unnamed: 0,Missing_Value_Count,Percentage
name,0,0.0
year,0,0.0
km_driven,0,0.0
fuel,3,0.069124
seller_type,2,0.046083
transmission,2,0.046083
owner,3,0.069124
Rating,4,0.092166
ExShowroom Price,0,0.0
selling_price,0,0.0


### Note : We check if the missing value is less than 75%. If more than 75% then delete the entrie column.

In [13]:
missing_value_df[missing_value_df['Percentage']>75]

Unnamed: 0,Missing_Value_Count,Percentage


In [14]:
#columns_to_be_deleted = list(missing_value_df[missing_value_df['Percentage']>75].index)
#data.drop(columns = columns_to_be_deleted, inplace = True)

#### Median Mode

In [15]:
data['fuel'].mode()[0]   # Mode : Most repeating/occuring value in the column, for textutal/object type data we use mode

'Diesel'

In [16]:
data['name'].mode()[0]

'Maruti Swift Dzire VDI'

In [17]:
data['km_driven'].median() # Median : For numeric type data

60000.0

#### We fill in the null cells of the dataframe

In [18]:
for col in data.columns: 
    print(col, data[col].dtype) ## The datatype of each columns is printed. int,float are numeric dtype

name object
year int64
km_driven int64
fuel object
seller_type object
transmission object
owner object
Rating float64
ExShowroom Price int64
selling_price int64


### Null values of every column are replaced according to the column's dtype. 
#### For Numeric dtype we replace the empty cell using median function
#### For Object/textual dtype we replace the empty cell using mode function

In [19]:
for col in data.columns:
    if data[col].dtype == 'object': # We access data of each column using for loop and check if dtype is object
        print(col, '--->', data[col].dtype, '--->', data[col].mode([0])) 
        data[col].fillna(data[col].mode()[0], inplace = True)
        # We fill/replace null values using fillna by the mode value obtained from the respective columns

name ---> object ---> 0    Maruti Swift Dzire VDI
Name: name, dtype: object
fuel ---> object ---> 0    Diesel
Name: fuel, dtype: object
seller_type ---> object ---> 0    Individual
Name: seller_type, dtype: object
transmission ---> object ---> 0    Manual
Name: transmission, dtype: object
owner ---> object ---> 0    First Owner
Name: owner, dtype: object


In [20]:
for col in data.columns:
    if data[col].dtype != 'object':  # We access data of each column using for loop and check if dtype is not object
        print(col, '--->', data[col].dtype, '--->', data[col].median())
        data[col].fillna(data[col].median(), inplace = True)
        # We fill/replace null values using fillna by the median value obtained from the respective columns

year ---> int64 ---> 2014.0
km_driven ---> int64 ---> 60000.0
Rating ---> float64 ---> 12.0
ExShowroom Price ---> int64 ---> 596055.0
selling_price ---> int64 ---> 350000.0


#### Now we cross check how many missing/null values we have. Refer ln[10] data.isna().sum()

In [21]:
data.isna().sum() # Step 4 ends

name                0
year                0
km_driven           0
fuel                0
seller_type         0
transmission        0
owner               0
Rating              0
ExShowroom Price    0
selling_price       0
dtype: int64

## Step 5 Unique Value Treatment

#### Adding a dummy column now

In [22]:
data['Dummy'] = 'Sanyukta'
data

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price,Dummy
0,Tata Indica Vista Aqua 1.4 TDI,2010,120000,Diesel,Individual,Manual,Second Owner,9.00,106001,100000,Sanyukta
1,Tata Nano Lx BSIV,2012,50000,Petrol,Individual,Manual,Second Owner,9.00,108556,100000,Sanyukta
2,Tata Indica Vista Aqua TDI BSIII,2011,70000,Diesel,Individual,Manual,First Owner,11.00,120678,120000,Sanyukta
3,Maruti Wagon R LXI Minor,2010,80000,Petrol,Individual,Manual,Second Owner,9.00,122917,100000,Sanyukta
4,Chevrolet Beat LT,2010,80000,Petrol,Individual,Manual,Second Owner,10.00,144902,130000,Sanyukta
...,...,...,...,...,...,...,...,...,...,...,...
4335,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9407487,4950000,Sanyukta
4336,BMW 5 Series 520d Luxury Line,2019,12999,Diesel,Dealer,Automatic,First Owner,9.71,9598350,4800000,Sanyukta
4337,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9857238,4950000,Sanyukta
4338,Audi RS7 2015-2019 Sportback Performance,2016,13000,Petrol,Dealer,Automatic,First Owner,14.09,14235729,8900000,Sanyukta


#### If the entire columns has
         1. Only one value throughout
         2. All values different
         Delete the entire Column

In [23]:
for col in data.columns:
    print(col, data[col].nunique())

name 1491
year 27
km_driven 770
fuel 5
seller_type 3
transmission 2
owner 5
Rating 566
ExShowroom Price 4331
selling_price 445
Dummy 1


#### Dataset size 4340
    1. 1
    2. 4340 & they are object/textual dtype
    delete

In [24]:
for col in data.columns:
    if(data[col].nunique()==1):
        del data[col]
    elif(data[col].nunique()==len(data) & (data[col].dtype == 'object')):
         del data[col]

In [25]:
len(data)

4340

In [26]:
data

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price
0,Tata Indica Vista Aqua 1.4 TDI,2010,120000,Diesel,Individual,Manual,Second Owner,9.00,106001,100000
1,Tata Nano Lx BSIV,2012,50000,Petrol,Individual,Manual,Second Owner,9.00,108556,100000
2,Tata Indica Vista Aqua TDI BSIII,2011,70000,Diesel,Individual,Manual,First Owner,11.00,120678,120000
3,Maruti Wagon R LXI Minor,2010,80000,Petrol,Individual,Manual,Second Owner,9.00,122917,100000
4,Chevrolet Beat LT,2010,80000,Petrol,Individual,Manual,Second Owner,10.00,144902,130000
...,...,...,...,...,...,...,...,...,...,...
4335,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9407487,4950000
4336,BMW 5 Series 520d Luxury Line,2019,12999,Diesel,Dealer,Automatic,First Owner,9.71,9598350,4800000
4337,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9857238,4950000
4338,Audi RS7 2015-2019 Sportback Performance,2016,13000,Petrol,Dealer,Automatic,First Owner,14.09,14235729,8900000


In [27]:
for col in data.columns:
    print(col, data[col].nunique())

name 1491
year 27
km_driven 770
fuel 5
seller_type 3
transmission 2
owner 5
Rating 566
ExShowroom Price 4331
selling_price 445


## Step 6 Feature Engineering - Variety Reduction

In [28]:
data['name'] 

0                          Tata Indica Vista Aqua 1.4 TDI
1                                       Tata Nano Lx BSIV
2                        Tata Indica Vista Aqua TDI BSIII
3                                Maruti Wagon R LXI Minor
4                                       Chevrolet Beat LT
                              ...                        
4335                              BMW X5 xDrive 30d xLine
4336                        BMW 5 Series 520d Luxury Line
4337                              BMW X5 xDrive 30d xLine
4338             Audi RS7 2015-2019 Sportback Performance
4339    Mercedes-Benz S-Class S 350d Connoisseurs Edition
Name: name, Length: 4340, dtype: object

In [29]:
data['name'].str.split(" ").str[0] # Splitting and extracting only the first word/company name

0                Tata
1                Tata
2                Tata
3              Maruti
4           Chevrolet
            ...      
4335              BMW
4336              BMW
4337              BMW
4338             Audi
4339    Mercedes-Benz
Name: name, Length: 4340, dtype: object

In [30]:
data['name_after_split'] = data['name'].str.split(" ").str[0] # Creating a new column in the dataframe where we store
# first name 
data

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price,name_after_split
0,Tata Indica Vista Aqua 1.4 TDI,2010,120000,Diesel,Individual,Manual,Second Owner,9.00,106001,100000,Tata
1,Tata Nano Lx BSIV,2012,50000,Petrol,Individual,Manual,Second Owner,9.00,108556,100000,Tata
2,Tata Indica Vista Aqua TDI BSIII,2011,70000,Diesel,Individual,Manual,First Owner,11.00,120678,120000,Tata
3,Maruti Wagon R LXI Minor,2010,80000,Petrol,Individual,Manual,Second Owner,9.00,122917,100000,Maruti
4,Chevrolet Beat LT,2010,80000,Petrol,Individual,Manual,Second Owner,10.00,144902,130000,Chevrolet
...,...,...,...,...,...,...,...,...,...,...,...
4335,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9407487,4950000,BMW
4336,BMW 5 Series 520d Luxury Line,2019,12999,Diesel,Dealer,Automatic,First Owner,9.71,9598350,4800000,BMW
4337,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9857238,4950000,BMW
4338,Audi RS7 2015-2019 Sportback Performance,2016,13000,Petrol,Dealer,Automatic,First Owner,14.09,14235729,8900000,Audi


In [31]:
for col in data.columns:
    print(col, data[col].nunique())

name 1491
year 27
km_driven 770
fuel 5
seller_type 3
transmission 2
owner 5
Rating 566
ExShowroom Price 4331
selling_price 445
name_after_split 29


In [32]:
round(data.describe(), 2)

Unnamed: 0,year,km_driven,Rating,ExShowroom Price,selling_price
count,4340.0,4340.0,4340.0,4340.0,4340.0
mean,2013.09,66215.78,11.97,845380.89,504127.31
std,4.22,46644.1,1.91,884840.54,578548.74
min,1992.0,1.0,9.0,106001.0,20000.0
25%,2011.0,35000.0,10.02,445389.5,208749.75
50%,2014.0,60000.0,12.0,596055.0,350000.0
75%,2016.0,90000.0,13.76,946243.0,600000.0
max,2020.0,806599.0,15.0,15538153.0,8900000.0


In [33]:
for col in data.columns:
    if data[col].dtype != 'object':
        print("Auto feature Engg -->>", col)

Auto feature Engg -->> year
Auto feature Engg -->> km_driven
Auto feature Engg -->> Rating
Auto feature Engg -->> ExShowroom Price
Auto feature Engg -->> selling_price


In [34]:
for col in data.columns:
    if data[col].dtype != 'object':
        print("Auto feature Engg -->>", col)
        new_col_name = col + '_bin'
        data[new_col_name] = pd.qcut(data[col], 4, labels = ['b1','b2','b3','b4'])

Auto feature Engg -->> year
Auto feature Engg -->> km_driven
Auto feature Engg -->> Rating
Auto feature Engg -->> ExShowroom Price
Auto feature Engg -->> selling_price


In [35]:
data

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price,name_after_split,year_bin,km_driven_bin,Rating_bin,ExShowroom Price_bin,selling_price_bin
0,Tata Indica Vista Aqua 1.4 TDI,2010,120000,Diesel,Individual,Manual,Second Owner,9.00,106001,100000,Tata,b1,b4,b1,b1,b1
1,Tata Nano Lx BSIV,2012,50000,Petrol,Individual,Manual,Second Owner,9.00,108556,100000,Tata,b2,b2,b1,b1,b1
2,Tata Indica Vista Aqua TDI BSIII,2011,70000,Diesel,Individual,Manual,First Owner,11.00,120678,120000,Tata,b1,b3,b2,b1,b1
3,Maruti Wagon R LXI Minor,2010,80000,Petrol,Individual,Manual,Second Owner,9.00,122917,100000,Maruti,b1,b3,b1,b1,b1
4,Chevrolet Beat LT,2010,80000,Petrol,Individual,Manual,Second Owner,10.00,144902,130000,Chevrolet,b1,b3,b1,b1,b1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9407487,4950000,BMW,b4,b1,b2,b4,b4
4336,BMW 5 Series 520d Luxury Line,2019,12999,Diesel,Dealer,Automatic,First Owner,9.71,9598350,4800000,BMW,b4,b1,b1,b4,b4
4337,BMW X5 xDrive 30d xLine,2019,30000,Diesel,Dealer,Automatic,First Owner,11.00,9857238,4950000,BMW,b4,b1,b2,b4,b4
4338,Audi RS7 2015-2019 Sportback Performance,2016,13000,Petrol,Dealer,Automatic,First Owner,14.09,14235729,8900000,Audi,b3,b1,b4,b4,b4


## Step 7 Label Encoding

In [36]:
LN = LabelEncoder() # We convert all the values in terms of meaningful digits which are assorted using bins

In [37]:
for col in data.columns:
    if ( data[col].dtype == 'object') | hasattr(data[col], 'cat'):
        data[col] = LN.fit_transform(data[col])

In [38]:
data

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price,name_after_split,year_bin,km_driven_bin,Rating_bin,ExShowroom Price_bin,selling_price_bin
0,1228,2010,120000,1,1,1,2,9.00,106001,100000,25,0,3,0,0,0
1,1283,2012,50000,4,1,1,2,9.00,108556,100000,25,1,1,0,0,0
2,1229,2011,70000,1,1,1,0,11.00,120678,120000,25,0,2,1,0,0
3,1041,2010,80000,4,1,1,2,9.00,122917,100000,18,0,2,0,0,0
4,56,2010,80000,4,1,1,2,10.00,144902,130000,3,0,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,39,2019,30000,1,0,0,0,11.00,9407487,4950000,2,3,0,1,3,3
4336,30,2019,12999,1,0,0,0,9.71,9598350,4800000,2,3,0,0,3,3
4337,39,2019,30000,1,0,0,0,11.00,9857238,4950000,2,3,0,1,3,3
4338,25,2016,13000,4,0,0,0,14.09,14235729,8900000,1,2,0,3,3,3


## Step 8 Correlation

In [39]:
corr = 100*data.corr()
corr.style.background_gradient(cmap = 'coolwarm')

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,Rating,ExShowroom Price,selling_price,name_after_split,year_bin,km_driven_bin,Rating_bin,ExShowroom Price_bin,selling_price_bin
name,100.0,-5.190221,12.620297,-8.385037,14.080245,8.778524,3.281915,1.876041,-6.840192,-7.759752,97.451337,-5.671639,11.630633,2.075166,-2.85603,-2.11076
year,-5.190221,100.0,-41.968815,-12.052816,-9.835151,-14.380043,-41.470517,-0.341515,30.487065,41.392168,-3.972402,89.078567,-48.037822,-0.020174,41.136106,66.112946
km_driven,12.620297,-41.968815,100.0,-28.563429,11.368942,12.022623,29.711504,-0.39265,-16.510516,-19.228863,13.146088,-48.163649,84.092423,-0.841211,-17.905059,-25.070004
fuel,-8.385037,-12.052816,-28.563429,100.0,3.838666,4.044484,-1.030093,-1.210894,-23.467424,-26.977883,-10.939373,-3.959058,-29.716702,-0.648921,-29.419206,-33.900059
seller_type,14.080245,-9.835151,11.368942,3.838666,100.0,17.492495,16.568072,-0.527834,-13.274456,-15.155423,14.421752,-7.662485,12.685058,-1.092437,-10.965633,-14.708225
transmission,8.778524,-14.380043,12.022623,4.044484,17.492495,100.0,7.889262,2.51395,-51.667679,-53.020514,11.069899,-16.829368,13.422679,2.323975,-30.819882,-30.933432
owner,3.281915,-41.470517,29.711504,-1.030093,16.568072,7.889262,100.0,0.751234,-16.772565,-20.784034,3.645168,-42.10963,34.241653,0.764166,-23.543955,-32.145235
Rating,1.876041,-0.341515,-0.39265,-1.210894,-0.527834,2.51395,0.751234,100.0,-0.109851,-2.246754,2.165585,-1.193808,-0.804309,96.617375,4.854796,-0.098509
ExShowroom Price,-6.840192,30.487065,-16.510516,-23.467424,-13.274456,-51.667679,-16.772565,-0.109851,100.0,96.062867,-8.755945,34.384808,-19.946381,0.336778,57.968207,54.676162
selling_price,-7.759752,41.392168,-19.228863,-26.977883,-15.155423,-53.020514,-20.784034,-2.246754,96.062867,100.0,-9.685807,42.254564,-22.965846,-1.622573,57.374834,62.504903


In [40]:
data.drop(columns = ['name', 'Rating', 'ExShowroom Price'], inplace = True)

In [41]:
corr = 100*data.corr()
corr.style.background_gradient(cmap = 'coolwarm')

Unnamed: 0,year,km_driven,fuel,seller_type,transmission,owner,selling_price,name_after_split,year_bin,km_driven_bin,Rating_bin,ExShowroom Price_bin,selling_price_bin
year,100.0,-41.968815,-12.052816,-9.835151,-14.380043,-41.470517,41.392168,-3.972402,89.078567,-48.037822,-0.020174,41.136106,66.112946
km_driven,-41.968815,100.0,-28.563429,11.368942,12.022623,29.711504,-19.228863,13.146088,-48.163649,84.092423,-0.841211,-17.905059,-25.070004
fuel,-12.052816,-28.563429,100.0,3.838666,4.044484,-1.030093,-26.977883,-10.939373,-3.959058,-29.716702,-0.648921,-29.419206,-33.900059
seller_type,-9.835151,11.368942,3.838666,100.0,17.492495,16.568072,-15.155423,14.421752,-7.662485,12.685058,-1.092437,-10.965633,-14.708225
transmission,-14.380043,12.022623,4.044484,17.492495,100.0,7.889262,-53.020514,11.069899,-16.829368,13.422679,2.323975,-30.819882,-30.933432
owner,-41.470517,29.711504,-1.030093,16.568072,7.889262,100.0,-20.784034,3.645168,-42.10963,34.241653,0.764166,-23.543955,-32.145235
selling_price,41.392168,-19.228863,-26.977883,-15.155423,-53.020514,-20.784034,100.0,-9.685807,42.254564,-22.965846,-1.622573,57.374834,62.504903
name_after_split,-3.972402,13.146088,-10.939373,14.421752,11.069899,3.645168,-9.685807,100.0,-4.079412,11.946879,2.320414,-3.121876,-2.5116
year_bin,89.078567,-48.163649,-3.959058,-7.662485,-16.829368,-42.10963,42.254564,-4.079412,100.0,-54.804322,-0.59327,48.265465,64.786231
km_driven_bin,-48.037822,84.092423,-29.716702,12.685058,13.422679,34.241653,-22.965846,11.946879,-54.804322,100.0,-1.114484,-23.448379,-30.692995


## Step 9 Train Test Split

In [42]:
x = data.drop(columns = 'selling_price')
y = data['selling_price']

In [43]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

In [48]:
len(y_train)

3038

In [50]:
len(y_test)

1302

In [51]:
len(x_test)

1302

In [52]:
len(x_train)

3038

## Step 10 Model Development

In [53]:
model = LinearRegression()

In [54]:
model.fit(x_train, y_train)

## Step 11 Predictions

In [55]:
y_pred = model.predict(x_test)

In [56]:
y_pred

array([222092.22059131, 838497.19401764, 183257.54426816, ...,
       541445.7718401 , 228944.69820593, 195825.73106053])

## Step 12 Accuracy

In [57]:
accuracy = 100*r2_score(y_test, y_pred)
accuracy

57.81778584459413

In [58]:
models = [LinearRegression(), Lasso(), ElasticNet(), KNeighborsRegressor(),
         DecisionTreeRegressor(), RandomForestRegressor(), AdaBoostRegressor(),
          GradientBoostingRegressor()]

In [59]:
acc_dic = {} 

In [60]:
for model in models:
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    accuracy = 100* r2_score(y_test, y_pred)
    acc_dic[model] = round(accuracy,2)

In [61]:
acc_dic

{LinearRegression(): 57.82,
 Lasso(): 57.82,
 ElasticNet(): 47.21,
 KNeighborsRegressor(): 39.11,
 DecisionTreeRegressor(): 79.79,
 RandomForestRegressor(): 85.48,
 AdaBoostRegressor(): 25.76,
 GradientBoostingRegressor(): 86.71}