# Regression Analysis

## Import Packages and Load Data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, PowerTransformer
from sklearn.preprocessing import SplineTransformer, StandardScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, RidgeCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
import sklearn.metrics as metrics

from sklearn import set_config
set_config(transform_output = 'pandas')

In [3]:
data = pd.read_parquet('../data/full_data.parquet')
data

Unnamed: 0,Ship Mode,Customer ID,Segment,Country/Region,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Sales,Quantity,Discount,Profit,Regional Manager,Returned,Order Cost,Order_Quarter,Order_WeekOfYear,Order_WeekOfMonth,Order_DayOfYear,Order_DayOfMonth,Order_DayOfWeek,Ship_Quarter,Ship_WeekOfYear,Ship_WeekOfMonth,Ship_DayOfYear,Ship_DayOfMonth,Ship_DayOfWeek
0,Second Class,CG-12520,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,261.9600,2,0.00,41.9136,Fred Suzuki,0,220.0464,4,45,2,312,8,0,4,45,2,315,11,3
1,Second Class,CG-12520,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,731.9400,3,0.00,219.5820,Fred Suzuki,0,512.3580,4,45,2,312,8,0,4,45,2,315,11,3
2,Second Class,DV-13045,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,14.6200,2,0.00,6.8714,Sadie Pawthorne,0,7.7486,2,23,2,163,12,5,2,24,3,167,16,2
3,Standard Class,SO-20335,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,957.5775,5,0.45,-383.0310,Fred Suzuki,0,1340.6085,4,41,2,285,11,6,4,42,3,292,18,6
4,Standard Class,SO-20335,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,22.3680,2,0.20,2.5164,Fred Suzuki,0,19.8516,4,41,2,285,11,6,4,42,3,292,18,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,Second Class,TB-21400,Consumer,United States,Miami,Florida,33180,South,FUR-FU-10001889,Furniture,Furnishings,25.2480,3,0.20,4.1028,Fred Suzuki,0,21.1452,1,4,3,21,21,0,1,4,4,23,23,2
9990,Standard Class,DB-13060,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,91.9600,2,0.00,15.6332,Sadie Pawthorne,1,76.3268,1,8,4,57,26,5,1,9,1,62,3,3
9991,Standard Class,DB-13060,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,258.5760,2,0.20,19.3932,Sadie Pawthorne,1,239.1828,1,8,4,57,26,5,1,9,1,62,3,3
9992,Standard Class,DB-13060,Consumer,United States,Costa Mesa,California,92627,West,OFF-PA-10004041,Office Supplies,Paper,29.6000,4,0.00,13.3200,Sadie Pawthorne,1,16.2800,1,8,4,57,26,5,1,9,1,62,3,3


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 30 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Ship Mode          9994 non-null   object 
 1   Customer ID        9994 non-null   object 
 2   Segment            9994 non-null   object 
 3   Country/Region     9994 non-null   object 
 4   City               9994 non-null   object 
 5   State              9994 non-null   object 
 6   Postal Code        9994 non-null   int64  
 7   Region             9994 non-null   object 
 8   Product ID         9994 non-null   object 
 9   Category           9994 non-null   object 
 10  Sub-Category       9994 non-null   object 
 11  Sales              9994 non-null   float64
 12  Quantity           9994 non-null   int64  
 13  Discount           9994 non-null   float64
 14  Profit             9994 non-null   float64
 15  Regional Manager   9994 non-null   object 
 16  Returned           9994 

## Perform Train/Test Split

In [7]:
X = data.drop(columns='Profit')
y = data['Profit']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Construct Preprocessing Pipeline

In [19]:
## Categorical Pipeline

cat_features = X.select_dtypes(include='object').columns
cont_features = X.select_dtypes(include='number').columns

cat_pipe = Pipeline([('onehot', OneHotEncoder(sparse_output=False,
                                              handle_unknown='infrequent_if_exist',
                                              max_categories=25))
                     ])

cont_pipe = Pipeline([('impute', SimpleImputer(strategy='mean')),
                      ('splineTF', SplineTransformer()),
                    #   ('poly', PolynomialFeatures(degree=2)),
                      ('power', PowerTransformer(method='yeo-johnson')),
                      ('scaler', RobustScaler())
                      ])

# Combine the pipelines into a ColumnTransformer
preprocessor = ColumnTransformer([
    ('cat', cat_pipe, cat_features),
    ('cont', cont_pipe, cont_features)
    ])

In [22]:
preprocessor.fit_transform(X_train)

Unnamed: 0,cat__Ship Mode_First Class,cat__Ship Mode_Same Day,cat__Ship Mode_Second Class,cat__Ship Mode_Standard Class,cat__Customer ID_AH-10690,cat__Customer ID_AP-10915,cat__Customer ID_CK-12205,cat__Customer ID_CK-12595,cat__Customer ID_CL-12565,cat__Customer ID_EA-14035,cat__Customer ID_EH-13765,cat__Customer ID_EP-13915,cat__Customer ID_GG-14650,cat__Customer ID_GT-14710,cat__Customer ID_JD-15895,cat__Customer ID_JL-15835,cat__Customer ID_KL-16555,cat__Customer ID_KL-16645,cat__Customer ID_LA-16780,cat__Customer ID_LC-16870,cat__Customer ID_MA-17560,cat__Customer ID_PG-18820,cat__Customer ID_PP-18955,cat__Customer ID_RL-19615,cat__Customer ID_SC-20725,cat__Customer ID_SV-20365,cat__Customer ID_WB-21850,cat__Customer ID_XP-21865,cat__Customer ID_infrequent_sklearn,cat__Segment_Consumer,cat__Segment_Corporate,cat__Segment_Home Office,cat__Country/Region_United States,cat__City_Arlington,cat__City_Aurora,cat__City_Chicago,cat__City_Columbia,cat__City_Columbus,cat__City_Dallas,cat__City_Detroit,cat__City_Henderson,cat__City_Houston,cat__City_Jackson,cat__City_Jacksonville,cat__City_Long Beach,cat__City_Los Angeles,cat__City_Louisville,cat__City_New York City,cat__City_Newark,cat__City_Philadelphia,cat__City_Phoenix,cat__City_Richmond,cat__City_San Antonio,cat__City_San Diego,cat__City_San Francisco,cat__City_Seattle,cat__City_Springfield,cat__City_infrequent_sklearn,cat__State_Arizona,cat__State_California,cat__State_Colorado,cat__State_Connecticut,cat__State_Delaware,cat__State_Florida,cat__State_Georgia,cat__State_Illinois,cat__State_Indiana,cat__State_Kentucky,cat__State_Maryland,cat__State_Massachusetts,cat__State_Michigan,cat__State_New Jersey,cat__State_New York,cat__State_North Carolina,cat__State_Ohio,cat__State_Oregon,cat__State_Pennsylvania,cat__State_Tennessee,cat__State_Texas,cat__State_Virginia,cat__State_Washington,cat__State_Wisconsin,cat__State_infrequent_sklearn,cat__Region_Central,cat__Region_East,cat__Region_South,cat__Region_West,cat__Product ID_FUR-CH-10001146,cat__Product ID_FUR-CH-10002880,cat__Product ID_FUR-CH-10003774,cat__Product ID_FUR-FU-10001473,cat__Product ID_FUR-FU-10004270,cat__Product ID_FUR-FU-10004973,cat__Product ID_FUR-TA-10001095,cat__Product ID_OFF-BI-10000145,cat__Product ID_OFF-BI-10000301,cat__Product ID_OFF-BI-10000343,cat__Product ID_OFF-BI-10000977,cat__Product ID_OFF-BI-10001524,cat__Product ID_OFF-BI-10002026,cat__Product ID_OFF-BI-10004632,cat__Product ID_OFF-LA-10001613,cat__Product ID_OFF-PA-10001970,cat__Product ID_OFF-ST-10000563,cat__Product ID_OFF-ST-10000736,cat__Product ID_OFF-ST-10001490,cat__Product ID_OFF-ST-10004950,cat__Product ID_TEC-AC-10002049,cat__Product ID_TEC-AC-10003038,cat__Product ID_TEC-AC-10003628,cat__Product ID_TEC-AC-10003832,cat__Product ID_infrequent_sklearn,cat__Category_Furniture,cat__Category_Office Supplies,cat__Category_Technology,cat__Sub-Category_Accessories,cat__Sub-Category_Appliances,cat__Sub-Category_Art,cat__Sub-Category_Binders,cat__Sub-Category_Bookcases,cat__Sub-Category_Chairs,cat__Sub-Category_Copiers,cat__Sub-Category_Envelopes,cat__Sub-Category_Fasteners,cat__Sub-Category_Furnishings,cat__Sub-Category_Labels,cat__Sub-Category_Machines,cat__Sub-Category_Paper,cat__Sub-Category_Phones,cat__Sub-Category_Storage,cat__Sub-Category_Supplies,cat__Sub-Category_Tables,cat__Regional Manager_Chuck Magee,cat__Regional Manager_Fred Suzuki,cat__Regional Manager_Roxanne Rodriguez,cat__Regional Manager_Sadie Pawthorne,cont__Postal Code_sp_0,cont__Postal Code_sp_1,cont__Postal Code_sp_2,cont__Postal Code_sp_3,cont__Postal Code_sp_4,cont__Postal Code_sp_5,cont__Postal Code_sp_6,cont__Sales_sp_0,cont__Sales_sp_1,cont__Sales_sp_2,cont__Sales_sp_3,cont__Sales_sp_4,cont__Sales_sp_5,cont__Sales_sp_6,cont__Quantity_sp_0,cont__Quantity_sp_1,cont__Quantity_sp_2,cont__Quantity_sp_3,cont__Quantity_sp_4,cont__Quantity_sp_5,cont__Quantity_sp_6,cont__Discount_sp_0,cont__Discount_sp_1,cont__Discount_sp_2,cont__Discount_sp_3,cont__Discount_sp_4,cont__Discount_sp_5,cont__Discount_sp_6,cont__Returned_sp_0,cont__Returned_sp_1,cont__Returned_sp_2,cont__Returned_sp_3,cont__Returned_sp_4,cont__Returned_sp_5,cont__Returned_sp_6,cont__Order Cost_sp_0,cont__Order Cost_sp_1,cont__Order Cost_sp_2,cont__Order Cost_sp_3,cont__Order Cost_sp_4,cont__Order Cost_sp_5,cont__Order Cost_sp_6,cont__Order_Quarter_sp_0,cont__Order_Quarter_sp_1,cont__Order_Quarter_sp_2,cont__Order_Quarter_sp_3,cont__Order_Quarter_sp_4,cont__Order_Quarter_sp_5,cont__Order_Quarter_sp_6,cont__Order_WeekOfYear_sp_0,cont__Order_WeekOfYear_sp_1,cont__Order_WeekOfYear_sp_2,cont__Order_WeekOfYear_sp_3,cont__Order_WeekOfYear_sp_4,cont__Order_WeekOfYear_sp_5,cont__Order_WeekOfYear_sp_6,cont__Order_WeekOfMonth_sp_0,cont__Order_WeekOfMonth_sp_1,cont__Order_WeekOfMonth_sp_2,cont__Order_WeekOfMonth_sp_3,cont__Order_WeekOfMonth_sp_4,cont__Order_WeekOfMonth_sp_5,cont__Order_WeekOfMonth_sp_6,cont__Order_DayOfYear_sp_0,cont__Order_DayOfYear_sp_1,cont__Order_DayOfYear_sp_2,cont__Order_DayOfYear_sp_3,cont__Order_DayOfYear_sp_4,cont__Order_DayOfYear_sp_5,cont__Order_DayOfYear_sp_6,cont__Order_DayOfMonth_sp_0,cont__Order_DayOfMonth_sp_1,cont__Order_DayOfMonth_sp_2,cont__Order_DayOfMonth_sp_3,cont__Order_DayOfMonth_sp_4,cont__Order_DayOfMonth_sp_5,cont__Order_DayOfMonth_sp_6,cont__Order_DayOfWeek_sp_0,cont__Order_DayOfWeek_sp_1,cont__Order_DayOfWeek_sp_2,cont__Order_DayOfWeek_sp_3,cont__Order_DayOfWeek_sp_4,cont__Order_DayOfWeek_sp_5,cont__Order_DayOfWeek_sp_6,cont__Ship_Quarter_sp_0,cont__Ship_Quarter_sp_1,cont__Ship_Quarter_sp_2,cont__Ship_Quarter_sp_3,cont__Ship_Quarter_sp_4,cont__Ship_Quarter_sp_5,cont__Ship_Quarter_sp_6,cont__Ship_WeekOfYear_sp_0,cont__Ship_WeekOfYear_sp_1,cont__Ship_WeekOfYear_sp_2,cont__Ship_WeekOfYear_sp_3,cont__Ship_WeekOfYear_sp_4,cont__Ship_WeekOfYear_sp_5,cont__Ship_WeekOfYear_sp_6,cont__Ship_WeekOfMonth_sp_0,cont__Ship_WeekOfMonth_sp_1,cont__Ship_WeekOfMonth_sp_2,cont__Ship_WeekOfMonth_sp_3,cont__Ship_WeekOfMonth_sp_4,cont__Ship_WeekOfMonth_sp_5,cont__Ship_WeekOfMonth_sp_6,cont__Ship_DayOfYear_sp_0,cont__Ship_DayOfYear_sp_1,cont__Ship_DayOfYear_sp_2,cont__Ship_DayOfYear_sp_3,cont__Ship_DayOfYear_sp_4,cont__Ship_DayOfYear_sp_5,cont__Ship_DayOfYear_sp_6,cont__Ship_DayOfMonth_sp_0,cont__Ship_DayOfMonth_sp_1,cont__Ship_DayOfMonth_sp_2,cont__Ship_DayOfMonth_sp_3,cont__Ship_DayOfMonth_sp_4,cont__Ship_DayOfMonth_sp_5,cont__Ship_DayOfMonth_sp_6,cont__Ship_DayOfWeek_sp_0,cont__Ship_DayOfWeek_sp_1,cont__Ship_DayOfWeek_sp_2,cont__Ship_DayOfWeek_sp_3,cont__Ship_DayOfWeek_sp_4,cont__Ship_DayOfWeek_sp_5,cont__Ship_DayOfWeek_sp_6
9531,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.000000,0.000000,-0.162068,0.335226,0.741625,0.512661,0.000062,-0.002919,-0.001207,0.003908,0.000616,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,0.000000,0.000000,1.000000,1.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.063326,-0.025082,0.052245,0.008605,0.0,0.0,0.0,0.000000,0.000000,-0.061817,0.0,0.000000,0.826253,1.0,0.000000,0.000000,-0.055727,-0.279987,0.560215,0.687701,0.267697,0.000000,0.00000,-1.00000,0.000000,0.905513,1.000000,0.000000,0.000000,0.000000,-0.034541,-0.288592,0.516414,0.653658,0.319347,0.000000,-0.003776,-0.463134,-0.440833,0.739919,1.401603,1.404281,0.0,1.0,1.590052,0.799511,-0.41115,-0.249222,0.000000,0.000000,0.000000,-0.061236,0.0,0.000000,0.82336,1.0,0.000000,0.000000,-0.056086,-0.355796,0.493641,0.753537,0.577974,0.000000,0.000000,-1.000000,-1.000000,0.000000,1.323828,3.662729,0.00000,0.000000,-0.038241,-0.338193,0.477116,0.707918,0.525979,0.000000,0.000000,-0.470990,-0.466558,0.000000,1.300741,236.502724,4.43043,1.130212,0.000000,-0.207043,-0.505238,0.000000,0.000000
7639,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,13.570213,1.117997,1.098005,0.029650,-0.591705,-0.038456,0.000000,-1.173300,-3.304620,1.773068,8.990752,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,1.000000,1.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.100254,-2.718725,1.569791,6.315242,0.0,0.0,0.0,2.911628,2.423803,0.938183,0.0,-0.481794,-0.173747,0.0,3.032413,3.197864,1.101368,-0.415546,-0.597177,-0.194774,0.000000,2.355873,1.85632,0.00000,-1.000000,-1.000000,0.000000,0.000000,2.930547,4.340278,1.114573,-0.355248,-0.628130,-0.234633,0.000000,308.515953,1.345083,0.154142,-0.622232,-0.334334,0.000000,0.000000,0.0,1.0,1.590052,0.799511,-0.41115,-0.249222,0.000000,2.840937,2.497152,0.938764,0.0,-0.486889,-0.17664,0.0,1.947829,3.244373,1.118835,-0.355796,-0.606902,-0.191789,0.000000,0.000000,1.000000,1.120252,0.000000,-1.000000,0.000000,0.000000,1.58968,3.860787,1.135669,-0.299385,-0.625846,-0.225394,0.000000,1.000000,1.000000,0.683821,0.000000,-0.469214,0.000000,0.000000,1.00000,1.000000,0.541407,0.000000,-0.505238,0.000000,0.000000
2724,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,-0.162068,-0.363576,0.188799,1.007826,1.235157,-0.243758,-0.149170,0.218663,0.076555,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,0.000000,0.000000,1.000000,1.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.167759,-0.080005,0.142594,0.032349,0.0,0.0,0.0,0.000000,0.000000,-0.061817,0.0,0.000000,0.826253,1.0,0.000000,0.000000,-0.055727,0.000000,0.638965,0.432707,0.000000,2.355873,1.85632,0.00000,-1.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,-0.034541,-0.014328,0.616806,0.396870,0.000277,278.595093,1.317559,0.431830,-0.582205,-0.334334,0.000000,0.000000,0.0,0.0,0.000000,0.799511,0.58885,0.000000,0.000000,0.000000,0.000000,-0.061236,0.0,0.000000,0.82336,1.0,0.000000,0.000000,-0.056086,-0.102010,0.613662,0.524245,0.010300,0.000000,1.000000,1.120252,0.000000,-1.000000,0.000000,0.000000,0.00000,0.000000,-0.038241,-0.070237,0.607397,0.459836,0.005890,1.000000,1.000000,0.683821,0.000000,-0.469214,0.000000,0.000000,1.00000,1.000000,0.541407,0.000000,-0.505238,0.000000,0.000000
9570,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.036128,0.750475,0.906176,-0.370784,-0.038456,0.000000,0.151429,0.047636,-0.117487,-0.013770,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,0.000000,0.000000,1.000000,1.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081449,0.024545,-0.062985,-0.006647,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.0,0.541191,0.000000,0.0,0.000000,0.000000,-0.055270,0.106861,0.629510,0.337030,0.000000,0.000000,0.00000,-1.00000,0.000000,0.905513,1.000000,0.000000,0.000000,0.000000,-0.034131,0.117716,0.609148,0.276603,0.000000,0.000000,-0.003776,-0.463134,-0.172389,0.815545,1.194465,0.014949,0.0,0.0,0.943019,1.152353,0.00000,-0.249222,0.000000,0.000000,0.000000,0.000000,1.0,0.536484,0.00000,0.0,0.000000,0.000000,-0.055625,0.106733,0.613662,0.334585,0.000000,0.000000,0.000000,-1.000000,0.000000,0.979871,1.000000,0.000000,0.00000,0.000000,-0.037829,0.117203,0.606240,0.287083,0.000000,0.000000,0.000000,-0.470990,0.000000,0.687162,1.000000,1.000000,0.00000,0.000000,0.000000,1.120398,0.000000,0.000000,0.000000
118,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.000000,0.197650,0.962210,0.829712,-0.517833,-0.038456,0.000000,-0.536144,-0.516278,0.537498,0.407335,0.0,0.0,0.0,1.054659,0.525235,-1.199177,-0.251285,0.0,0.0,0.0,0.000000,-0.361370,-0.569628,0.197152,3.10621,3.53276,3.806477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.987486,-1.947248,1.274033,3.338400,0.0,0.0,0.0,0.000000,1.000000,1.439145,1.0,-0.458809,-0.173747,0.0,0.000000,2.875222,1.155592,0.000000,-0.597177,-0.194774,0.000000,2.355873,1.85632,0.00000,-1.000000,-1.000000,0.000000,0.000000,0.000000,3.852860,1.153480,0.073176,-0.628105,-0.234633,0.000000,206.366211,1.285286,0.537035,-0.527119,-0.334334,0.000000,0.000000,0.0,0.0,-0.056981,-0.200489,0.00000,0.955392,5.640718,0.000000,1.000000,1.454892,1.0,-0.463516,-0.17664,0.0,0.000000,2.711345,1.149925,0.106733,-0.606736,-0.191789,0.000000,0.000000,1.000000,1.120252,0.000000,-1.000000,0.000000,0.000000,0.00000,3.185732,1.158377,0.146679,-0.625514,-0.225394,0.000000,0.000000,0.611082,0.657084,0.348825,-0.464566,0.000000,0.000000,0.00000,0.000000,-0.458593,0.792957,0.494762,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5734,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,64.016163,1.252015,0.678810,-0.359517,-0.591705,-0.038456,0.000000,0.214875,0.059927,-0.164360,-0.016092,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,1.000000,1.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.216707,0.049289,-0.161731,-0.011136,0.0,0.0,0.0,0.000000,1.000000,1.439145,1.0,-0.458809,-0.173747,0.0,0.000000,0.003534,0.713259,0.810427,-0.329416,-0.194774,0.000000,0.000000,0.00000,-1.00000,-1.000000,0.000000,1.251777,3.890819,0.000000,0.000639,0.683361,0.800469,-0.318848,-0.234633,0.000000,0.000000,-0.003776,-0.463134,-0.611346,0.441524,1.474942,3.106234,0.0,1.0,1.590052,0.799511,-0.41115,-0.249222,0.000000,0.000000,0.000000,0.000000,1.0,0.536484,0.00000,0.0,0.000000,0.003518,0.714502,0.797158,-0.331510,-0.191789,0.000000,2.403578,1.791791,0.000000,-1.000000,-1.000000,0.000000,0.000000,0.00000,0.000002,0.642572,0.787313,-0.275295,-0.225394,0.000000,277.875685,1.310935,0.153966,-0.464917,-0.469214,0.000000,0.000000,0.00000,0.000000,-0.481560,0.000000,0.494762,2.917461,0.820562
5191,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,-0.162068,-0.361021,0.213165,1.004553,1.222944,0.230361,0.062330,-0.175557,-0.016462,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,1.000000,1.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264106,0.053963,-0.194553,-0.011607,0.0,0.0,0.0,0.000000,1.000000,1.439145,1.0,-0.458809,-0.173747,0.0,0.000000,1.742992,1.121734,0.409850,-0.586980,-0.194774,0.000000,2.355873,1.85632,0.00000,-1.000000,-1.000000,0.000000,0.000000,0.000000,2.131180,1.110435,0.469601,-0.611454,-0.234633,0.000000,206.366211,1.285286,0.537035,-0.527119,-0.334334,0.000000,0.000000,0.0,0.0,0.943019,1.152353,0.00000,-0.249222,0.000000,0.000000,1.000000,1.454892,1.0,-0.463516,-0.17664,0.0,0.000000,1.367974,1.098847,0.490589,-0.586247,-0.191789,0.000000,0.000000,1.000000,1.120252,0.000000,-1.000000,0.000000,0.000000,0.00000,1.548401,1.104480,0.507729,-0.601325,-0.225394,0.000000,0.000000,0.819512,0.683821,0.173659,-0.469042,0.000000,0.000000,4.43043,1.130212,0.000000,-0.207043,-0.505238,0.000000,0.000000
5390,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,-0.162068,-0.371236,0.000000,1.022573,1.278523,0.205718,0.058400,-0.157695,-0.015842,0.0,0.0,0.0,0.000000,0.000000,0.512369,0.000000,0.0,0.0,0.0,0.000000,0.000000,1.000000,1.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.214909,0.049074,-0.160468,-0.011110,0.0,0.0,0.0,0.000000,0.000000,-0.061817,0.0,0.000000,0.826253,1.0,0.000000,0.000000,-0.055727,-0.415546,0.437145,0.805226,1.000000,0.000000,1.00000,1.16197,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,-0.034541,-0.417636,0.373870,0.771832,1.055761,0.000000,0.595810,0.670805,0.176794,-0.329560,0.000000,0.000000,0.0,0.0,0.943019,1.152353,0.00000,-0.249222,0.000000,0.000000,0.000000,-0.061236,0.0,0.000000,0.82336,1.0,0.000000,0.000000,-0.056086,-0.418612,0.428713,0.808211,1.000000,0.000000,1.000000,1.120252,0.000000,-1.000000,0.000000,0.000000,0.00000,0.000000,-0.038241,-0.432573,0.358469,0.793320,1.168216,0.000000,0.226889,0.529010,0.634698,-0.411444,0.000000,0.000000,0.00000,0.000000,-0.481560,0.000000,0.494762,2.917461,0.820562
860,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.000000,0.000000,-0.162068,-0.361021,0.213165,1.004553,1.222944,0.358282,0.074215,-0.264642,-0.017707,0.0,0.0,0.0,0.718650,0.363280,-0.487631,-0.216457,0.0,0.0,0.0,1.000000,1.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.306539,0.056621,-0.223217,-0.011783,0.0,0.0,0.0,0.000000,1.000000,1.439145,1.0,-0.458809,-0.173747,0.0,0.000000,0.218024,0.944273,0.734313,-0.482350,-0.194774,0.000000,0.000000,1.00000,1.16197,0.000000,-1.000000,0.000000,0.000000,0.000000,0.189094,0.914807,0.747132,-0.482865,-0.234633,0.000000,0.000000,0.807712,0.698969,0.000000,-0.334157,0.000000,0.000000,0.0,0.0,-0.056981,-0.200489,0.00000,0.955392,5.640718,0.000000,1.000000,1.454892,1.0,-0.463516,-0.17664,0.0,0.000000,0.093596,0.879525,0.757568,-0.444957,-0.191789,0.000000,0.000000,0.000000,0.000000,1.114095,0.000000,0.000000,0.000000,0.00000,0.055194,0.847193,0.760961,-0.425218,-0.225394,0.000000,0.000000,0.000000,0.000000,0.864322,0.000000,0.000000,0.000000,0.00000,0.000000,-0.481560,-0.207043,0.000000,3.131273,3.084294


## Instantiate and Train Model: RandomForestRegressor

In [24]:
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_jobs = -1))
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [25]:
metrics.mean_squared_error(y_test, y_pred, squared=False)



167.59472546130803