- Identify Categorical Features & Get Dummy
- Outlier Detection & Processing
- Standardization of Numerical Values

In [21]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
color = sns.color_palette()

%matplotlib inline

pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows = 100

In [22]:
model=pd.read_csv("merged_16_clean.csv")

In [23]:
model.dtypes

logerror                        float64
airconditioningtypeid           float64
bathroomcnt                     float64
bedroomcnt                      float64
buildingqualitytypeid           float64
calculatedfinishedsquarefeet    float64
fips                            float64
fireplacecnt                    float64
garagecarcnt                    float64
garagetotalsqft                 float64
heatingorsystemtypeid           float64
latitude                        float64
longitude                       float64
lotsizesquarefeet               float64
poolcnt                         float64
pooltypeid10                    float64
pooltypeid2                     float64
pooltypeid7                     float64
propertycountylandusecode        object
propertylandusetypeid           float64
regionidzip                     float64
roomcnt                         float64
threequarterbathnbr             float64
unitcnt                         float64
yearbuilt                       float64


# Identify Categorical Features & Get Dummy

We need to be careful when we get dummy for categorical features, or we will encounter "curse of dimensionality" !
 So, first, let's see how many levels we will get from each categorical features.

In [27]:
print('airconditioningtypeid',len(model['airconditioningtypeid'].unique()))
print("buildingqualitytypeid",len(model["buildingqualitytypeid"].unique()))
print("fips",len(model["fips"].unique()))
print("heatingorsystemtypeid",len(model["heatingorsystemtypeid"].unique()))
print("pooltypeid10",len(model["pooltypeid10"].unique()))
print("pooltypeid7",len(model["pooltypeid7"].unique()))
print("pooltypeid2",len(model["pooltypeid2"].unique()))
print("propertylandusetypeid",len(model["propertylandusetypeid"].unique()))
print("assessmentyear",len(model["assessmentyear"].unique()))
print("taxdelinquencyflag",len(model["taxdelinquencyflag"].unique()))
print("taxdelinquencyyear",len(model["taxdelinquencyyear"].unique()))
print("yearbuilt",len(model["yearbuilt"].unique()))
print("regionidzip",len(model["regionidzip"].unique()))


airconditioningtypeid 7
buildingqualitytypeid 28
fips 3
heatingorsystemtypeid 13
pooltypeid10 2
pooltypeid7 2
pooltypeid2 2
propertylandusetypeid 14
assessmentyear 1
taxdelinquencyflag 2
taxdelinquencyyear 12
yearbuilt 28
regionidzip 389


In [35]:
# To reduce the level that "regionidzip" has
model["regionidzip"] = model["regionidzip"].apply(lambda x:str(x)[:-3])
print("regionidzip",len(model["regionidzip"].unique()))

regionidzip 78


In [37]:
model=pd.get_dummies(model, columns=['airconditioningtypeid','buildingqualitytypeid',"fips",\
                           'heatingorsystemtypeid','pooltypeid10','pooltypeid2','pooltypeid7','propertylandusetypeid',\
                              "regionidzip","taxdelinquencyflag","taxdelinquencyyear","yearbuilt"])

In [38]:
model

Unnamed: 0,logerror,bathroomcnt,bedroomcnt,calculatedfinishedsquarefeet,fireplacecnt,garagecarcnt,garagetotalsqft,latitude,longitude,lotsizesquarefeet,poolcnt,propertycountylandusecode,roomcnt,threequarterbathnbr,unitcnt,numberofstories,structuretaxvaluedollarcnt,taxvaluedollarcnt,assessmentyear,landtaxvaluedollarcnt,airconditioningtypeid_0.0,airconditioningtypeid_1.0,airconditioningtypeid_3.0,airconditioningtypeid_5.0,airconditioningtypeid_9.0,airconditioningtypeid_11.0,airconditioningtypeid_13.0,buildingqualitytypeid_0.0,buildingqualitytypeid_1885.0,buildingqualitytypeid_1890.0,buildingqualitytypeid_1895.0,buildingqualitytypeid_1900.0,buildingqualitytypeid_1905.0,buildingqualitytypeid_1910.0,buildingqualitytypeid_1915.0,buildingqualitytypeid_1920.0,buildingqualitytypeid_1925.0,buildingqualitytypeid_1930.0,buildingqualitytypeid_1935.0,buildingqualitytypeid_1940.0,buildingqualitytypeid_1945.0,buildingqualitytypeid_1950.0,buildingqualitytypeid_1955.0,buildingqualitytypeid_1960.0,buildingqualitytypeid_1965.0,buildingqualitytypeid_1970.0,buildingqualitytypeid_1975.0,buildingqualitytypeid_1980.0,buildingqualitytypeid_1985.0,buildingqualitytypeid_1990.0,buildingqualitytypeid_1995.0,buildingqualitytypeid_2000.0,buildingqualitytypeid_2005.0,buildingqualitytypeid_2010.0,buildingqualitytypeid_2015.0,fips_6037.0,fips_6059.0,fips_6111.0,heatingorsystemtypeid_0.0,heatingorsystemtypeid_1.0,heatingorsystemtypeid_2.0,heatingorsystemtypeid_6.0,heatingorsystemtypeid_7.0,heatingorsystemtypeid_10.0,heatingorsystemtypeid_11.0,heatingorsystemtypeid_12.0,heatingorsystemtypeid_13.0,heatingorsystemtypeid_14.0,heatingorsystemtypeid_18.0,heatingorsystemtypeid_20.0,heatingorsystemtypeid_24.0,pooltypeid10_0.0,pooltypeid10_1.0,pooltypeid2_0.0,pooltypeid2_1.0,pooltypeid7_0.0,pooltypeid7_1.0,propertylandusetypeid_31.0,propertylandusetypeid_47.0,propertylandusetypeid_246.0,propertylandusetypeid_247.0,propertylandusetypeid_248.0,propertylandusetypeid_260.0,propertylandusetypeid_261.0,propertylandusetypeid_263.0,propertylandusetypeid_264.0,propertylandusetypeid_265.0,propertylandusetypeid_266.0,propertylandusetypeid_267.0,propertylandusetypeid_269.0,propertylandusetypeid_275.0,regionidzip_,regionidzip_39967,regionidzip_9598,regionidzip_9599,regionidzip_9600,regionidzip_9601,regionidzip_9602,regionidzip_9603,regionidzip_9604,regionidzip_9605,regionidzip_9607,regionidzip_9608,regionidzip_9609,regionidzip_9610,regionidzip_9611,regionidzip_9612,regionidzip_9613,regionidzip_9614,regionidzip_9615,regionidzip_9616,regionidzip_9617,regionidzip_9618,regionidzip_9619,regionidzip_9620,regionidzip_9621,regionidzip_9622,regionidzip_9623,regionidzip_9624,regionidzip_9626,regionidzip_9627,regionidzip_9628,regionidzip_9629,regionidzip_9632,regionidzip_9633,regionidzip_9634,regionidzip_9635,regionidzip_9636,regionidzip_9637,regionidzip_9638,regionidzip_9639,regionidzip_9640,regionidzip_9641,regionidzip_9642,regionidzip_9643,regionidzip_9644,regionidzip_9645,regionidzip_9646,regionidzip_9647,regionidzip_9648,regionidzip_9649,regionidzip_9650,regionidzip_9651,regionidzip_9652,regionidzip_9653,regionidzip_9693,regionidzip_9694,regionidzip_9695,regionidzip_9696,regionidzip_9697,regionidzip_9698,regionidzip_9699,regionidzip_9700,regionidzip_9701,regionidzip_9702,regionidzip_9703,regionidzip_9704,regionidzip_9705,regionidzip_9706,regionidzip_9707,regionidzip_9708,regionidzip_9709,regionidzip_9710,regionidzip_9711,regionidzip_9729,regionidzip_9731,regionidzip_9732,regionidzip_9733,regionidzip_9734,taxdelinquencyflag_Y,taxdelinquencyflag_none,taxdelinquencyyear_0.0,taxdelinquencyyear_6.0,taxdelinquencyyear_7.0,taxdelinquencyyear_8.0,taxdelinquencyyear_9.0,taxdelinquencyyear_10.0,taxdelinquencyyear_11.0,taxdelinquencyyear_12.0,taxdelinquencyyear_13.0,taxdelinquencyyear_14.0,taxdelinquencyyear_15.0,taxdelinquencyyear_99.0,yearbuilt_0.0,yearbuilt_1885.0,yearbuilt_1890.0,yearbuilt_1895.0,yearbuilt_1900.0,yearbuilt_1905.0,yearbuilt_1910.0,yearbuilt_1915.0,yearbuilt_1920.0,yearbuilt_1925.0,yearbuilt_1930.0,yearbuilt_1935.0,yearbuilt_1940.0,yearbuilt_1945.0,yearbuilt_1950.0,yearbuilt_1955.0,yearbuilt_1960.0,yearbuilt_1965.0,yearbuilt_1970.0,yearbuilt_1975.0,yearbuilt_1980.0,yearbuilt_1985.0,yearbuilt_1990.0,yearbuilt_1995.0,yearbuilt_2000.0,yearbuilt_2005.0,yearbuilt_2010.0,yearbuilt_2015.0
0,0.0276,2.0,3.0,1684.0,0.0,2.0,433.0,34280990.0,-118488536.0,7528.0,0.0,0100,0.0,1.0,1.0,1.0,122754.0,360170.0,2015.0,237416.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,-0.1684,3.5,4.0,2263.0,0.0,2.0,468.0,33668120.0,-117677556.0,3643.0,0.0,1,0.0,1.0,1.0,1.0,346458.0,585529.0,2015.0,239071.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,-0.0040,3.0,2.0,2217.0,0.0,2.0,433.0,34136312.0,-118175032.0,11423.0,0.0,0100,0.0,1.0,1.0,1.0,61994.0,119906.0,2015.0,57912.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.0218,2.0,2.0,839.0,0.0,2.0,433.0,33755800.0,-118309000.0,70859.0,0.0,010C,0.0,1.0,1.0,1.0,171518.0,244880.0,2015.0,73362.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,-0.0050,2.5,4.0,2283.0,0.0,2.0,598.0,33485643.0,-117700234.0,6000.0,1.0,122,8.0,1.0,1.0,2.0,169574.0,434551.0,2015.0,264977.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90270,-0.0356,1.0,1.0,653.0,0.0,2.0,433.0,34203000.0,-118562000.0,93676.0,1.0,010C,0.0,1.0,1.0,1.0,43800.0,191000.0,2015.0,147200.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
90271,0.0070,3.0,3.0,2856.0,0.0,2.0,433.0,34198744.0,-118224849.0,9343.0,0.0,0100,0.0,1.0,1.0,1.0,117893.0,161111.0,2015.0,43218.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
90272,-0.2679,2.0,4.0,2617.0,0.0,2.0,433.0,34075655.0,-118025537.0,6865.0,0.0,0200,0.0,1.0,2.0,1.0,22008.0,38096.0,2015.0,16088.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
90273,0.0602,2.0,2.0,1034.0,0.0,2.0,433.0,33963900.0,-118367000.0,20033.0,0.0,010C,0.0,1.0,1.0,1.0,132991.0,165869.0,2015.0,32878.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [39]:
model.dtypes

logerror                        float64
bathroomcnt                     float64
bedroomcnt                      float64
calculatedfinishedsquarefeet    float64
fireplacecnt                    float64
                                 ...   
yearbuilt_1995.0                  uint8
yearbuilt_2000.0                  uint8
yearbuilt_2005.0                  uint8
yearbuilt_2010.0                  uint8
yearbuilt_2015.0                  uint8
Length: 211, dtype: object

# Outlier Detection & Processing

Outliers in the y column have high influence of the model, and outliers in the x columns have high leverage. So we need to remove those outliers.

In [None]:
# get numerical features

def get_numerical_features(df):
    
    """
    input: dataframe
    return: numerical feature list
    """
    res = []
    for f in df:
        if np.issubdtype(df[f].dtype, np.number):
#             print (df[f].dtype)
            res.append(f)
    
    return res

numerical_features = get_numerical_features(model)


In [42]:
# test
len(numerical_features)

210

In [43]:
# detect outlier using IQR

def detect_outlier(feature_values, feature_name):
    
    """
    input: numerical data, Array
    return: new data without outlier
    """
    tmp = feature_values.copy()
    
    q1, q3 = np.percentile(tmp, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (iqr * 1.5)
    upper_bound = q3 + (iqr * 1.5)
    
    has_outlier = (np.max(tmp) > upper_bound) or (np.min(tmp) < lower_bound)
    
    if not has_outlier:
        text = feature_name + ": No outlier, yes."
        print (text.rjust(60))
        return tmp
    
    upper_idx = np.where(tmp > upper_bound)
    lower_idx = np.where(tmp < lower_bound)
    tmp[upper_idx] = upper_bound
    tmp[lower_idx] = lower_bound
    
    if np.max(tmp) <= upper_bound and np.min(tmp) >= lower_bound:
        text = feature_name + ": Done with handling outliers."
        print (text.rjust(60), " lower bound: ", lower_bound, ", upper bound:", upper_bound)
    else:
        print (feature_name,": Warning!! Fail handling outliers")
    
    return tmp

In [45]:
for f in numerical_features:
    model[f] = detect_outlier(model[f].values,f)

                      logerror: Done with handling outliers.  lower bound:  -0.12205 , upper bound: 0.13595000000000002
                   bathroomcnt: Done with handling outliers.  lower bound:  0.5 , upper bound: 4.5
                    bedroomcnt: Done with handling outliers.  lower bound:  -1.0 , upper bound: 7.0
  calculatedfinishedsquarefeet: Done with handling outliers.  lower bound:  -166.0 , upper bound: 3442.0
                  fireplacecnt: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
                  garagecarcnt: Done with handling outliers.  lower bound:  2.0 , upper bound: 2.0
               garagetotalsqft: Done with handling outliers.  lower bound:  433.0 , upper bound: 433.0
                      latitude: Done with handling outliers.  lower bound:  33269733.25 , upper bound: 34714547.25
                     longitude: Done with handling outliers.  lower bound:  -119146848.5 , upper bound: -117186432.5
             lotsizesquarefeet: Done with h

             regionidzip_39967: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9598: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9599: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9600: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9601: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9602: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9603: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9604: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9605: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
              regionidzip_9607: Done with handling outliers.  lower bound:  0.0 , upper bound: 0.0
          

# Standardization of Numerical Values

Since the scale of numerical values will affect the model estimation, we need to standardize all the numerical features.

In [47]:
# save list,直接用
from sklearn.preprocessing import StandardScaler

# initialize a scaler object
scaler = StandardScaler()
d=pd.DataFrame(scaler.fit_transform(model[numerical_features]))
d.columns= numerical_features
model.drop(numerical_features, axis=1, inplace=True)
model_new=pd.concat([d,model],axis=1)

In [48]:
model_new

Unnamed: 0,logerror,bathroomcnt,bedroomcnt,calculatedfinishedsquarefeet,fireplacecnt,garagecarcnt,garagetotalsqft,latitude,longitude,lotsizesquarefeet,poolcnt,roomcnt,threequarterbathnbr,unitcnt,numberofstories,structuretaxvaluedollarcnt,taxvaluedollarcnt,assessmentyear,landtaxvaluedollarcnt,airconditioningtypeid_0.0,airconditioningtypeid_1.0,airconditioningtypeid_3.0,airconditioningtypeid_5.0,airconditioningtypeid_9.0,airconditioningtypeid_11.0,airconditioningtypeid_13.0,buildingqualitytypeid_0.0,buildingqualitytypeid_1885.0,buildingqualitytypeid_1890.0,buildingqualitytypeid_1895.0,buildingqualitytypeid_1900.0,buildingqualitytypeid_1905.0,buildingqualitytypeid_1910.0,buildingqualitytypeid_1915.0,buildingqualitytypeid_1920.0,buildingqualitytypeid_1925.0,buildingqualitytypeid_1930.0,buildingqualitytypeid_1935.0,buildingqualitytypeid_1940.0,buildingqualitytypeid_1945.0,buildingqualitytypeid_1950.0,buildingqualitytypeid_1955.0,buildingqualitytypeid_1960.0,buildingqualitytypeid_1965.0,buildingqualitytypeid_1970.0,buildingqualitytypeid_1975.0,buildingqualitytypeid_1980.0,buildingqualitytypeid_1985.0,buildingqualitytypeid_1990.0,buildingqualitytypeid_1995.0,buildingqualitytypeid_2000.0,buildingqualitytypeid_2005.0,buildingqualitytypeid_2010.0,buildingqualitytypeid_2015.0,fips_6037.0,fips_6059.0,fips_6111.0,heatingorsystemtypeid_0.0,heatingorsystemtypeid_1.0,heatingorsystemtypeid_2.0,heatingorsystemtypeid_6.0,heatingorsystemtypeid_7.0,heatingorsystemtypeid_10.0,heatingorsystemtypeid_11.0,heatingorsystemtypeid_12.0,heatingorsystemtypeid_13.0,heatingorsystemtypeid_14.0,heatingorsystemtypeid_18.0,heatingorsystemtypeid_20.0,heatingorsystemtypeid_24.0,pooltypeid10_0.0,pooltypeid10_1.0,pooltypeid2_0.0,pooltypeid2_1.0,pooltypeid7_0.0,pooltypeid7_1.0,propertylandusetypeid_31.0,propertylandusetypeid_47.0,propertylandusetypeid_246.0,propertylandusetypeid_247.0,propertylandusetypeid_248.0,propertylandusetypeid_260.0,propertylandusetypeid_261.0,propertylandusetypeid_263.0,propertylandusetypeid_264.0,propertylandusetypeid_265.0,propertylandusetypeid_266.0,propertylandusetypeid_267.0,propertylandusetypeid_269.0,propertylandusetypeid_275.0,regionidzip_,regionidzip_39967,regionidzip_9598,regionidzip_9599,regionidzip_9600,regionidzip_9601,regionidzip_9602,regionidzip_9603,regionidzip_9604,regionidzip_9605,regionidzip_9607,regionidzip_9608,regionidzip_9609,regionidzip_9610,regionidzip_9611,regionidzip_9612,regionidzip_9613,regionidzip_9614,regionidzip_9615,regionidzip_9616,regionidzip_9617,regionidzip_9618,regionidzip_9619,regionidzip_9620,regionidzip_9621,regionidzip_9622,regionidzip_9623,regionidzip_9624,regionidzip_9626,regionidzip_9627,regionidzip_9628,regionidzip_9629,regionidzip_9632,regionidzip_9633,regionidzip_9634,regionidzip_9635,regionidzip_9636,regionidzip_9637,regionidzip_9638,regionidzip_9639,regionidzip_9640,regionidzip_9641,regionidzip_9642,regionidzip_9643,regionidzip_9644,regionidzip_9645,regionidzip_9646,regionidzip_9647,regionidzip_9648,regionidzip_9649,regionidzip_9650,regionidzip_9651,regionidzip_9652,regionidzip_9653,regionidzip_9693,regionidzip_9694,regionidzip_9695,regionidzip_9696,regionidzip_9697,regionidzip_9698,regionidzip_9699,regionidzip_9700,regionidzip_9701,regionidzip_9702,regionidzip_9703,regionidzip_9704,regionidzip_9705,regionidzip_9706,regionidzip_9707,regionidzip_9708,regionidzip_9709,regionidzip_9710,regionidzip_9711,regionidzip_9729,regionidzip_9731,regionidzip_9732,regionidzip_9733,regionidzip_9734,taxdelinquencyflag_Y,taxdelinquencyflag_none,taxdelinquencyyear_0.0,taxdelinquencyyear_6.0,taxdelinquencyyear_7.0,taxdelinquencyyear_8.0,taxdelinquencyyear_9.0,taxdelinquencyyear_10.0,taxdelinquencyyear_11.0,taxdelinquencyyear_12.0,taxdelinquencyyear_13.0,taxdelinquencyyear_14.0,taxdelinquencyyear_15.0,taxdelinquencyyear_99.0,yearbuilt_0.0,yearbuilt_1885.0,yearbuilt_1890.0,yearbuilt_1895.0,yearbuilt_1900.0,yearbuilt_1905.0,yearbuilt_1910.0,yearbuilt_1915.0,yearbuilt_1920.0,yearbuilt_1925.0,yearbuilt_1930.0,yearbuilt_1935.0,yearbuilt_1940.0,yearbuilt_1945.0,yearbuilt_1950.0,yearbuilt_1955.0,yearbuilt_1960.0,yearbuilt_1965.0,yearbuilt_1970.0,yearbuilt_1975.0,yearbuilt_1980.0,yearbuilt_1985.0,yearbuilt_1990.0,yearbuilt_1995.0,yearbuilt_2000.0,yearbuilt_2005.0,yearbuilt_2010.0,yearbuilt_2015.0,propertycountylandusecode
0,0.328685,-0.289369,-0.020738,-0.047657,0.0,0.0,0.0,1.040557,-0.820193,-0.258111,0.0,0.0,0.0,0.0,0.0,-0.348683,-0.160999,0.0,-0.030868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,0.990947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.699126,0.0,0.0,0.0,-0.581550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0100
1,-2.075183,1.408128,0.876312,0.751400,0.0,0.0,0.0,-1.273292,1.461462,-1.125062,0.0,0.0,0.0,0.0,0.0,1.794055,0.659336,0.0,-0.022647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.359302,1.638274,0.0,0.0,0.0,-1.009136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.699126,0.0,0.0,0.0,-0.581550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,-0.178914,0.842296,-0.917789,0.687917,0.0,0.0,0.0,0.494335,0.061836,0.611072,0.0,0.0,0.0,0.0,0.0,-0.930670,-1.035589,0.0,-0.922558,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,0.990947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.699126,0.0,0.0,0.0,-0.581550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0100
3,0.235518,-0.289369,-0.917789,-1.213810,0.0,0.0,0.0,-0.942262,-0.315077,1.866366,0.0,0.0,0.0,0.0,0.0,0.118400,-0.580669,0.0,-0.845810,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,0.990947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.430357,0.0,0.0,0.0,1.719543,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010C
4,-0.194977,0.276463,0.876312,0.779001,0.0,0.0,0.0,-1.962221,1.397658,-0.599089,0.0,0.0,0.0,0.0,0.0,0.099780,0.109758,0.0,0.106042,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.359302,1.638274,0.0,0.0,0.0,-1.009136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.699126,0.0,0.0,0.0,-0.581550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90270,-0.686513,-1.421033,-1.814839,-1.470502,0.0,0.0,0.0,0.746111,-1.026881,1.866366,0.0,0.0,0.0,0.0,0.0,-1.104941,-0.776798,0.0,-0.479018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,0.990947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.430357,0.0,0.0,0.0,1.719543,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010C
90271,-0.002218,0.842296,-0.020738,1.569777,0.0,0.0,0.0,0.730043,-0.078322,0.146913,0.0,0.0,0.0,0.0,0.0,-0.395244,-0.885598,0.0,-0.995551,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,0.990947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.699126,0.0,0.0,0.0,-0.581550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0100
90272,-2.075183,-0.289369,0.876312,1.239942,0.0,0.0,0.0,0.265329,0.482433,-0.406061,0.0,0.0,0.0,0.0,0.0,-1.313675,-1.333388,0.0,-1.130319,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,-1.009136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.430357,0.0,0.0,0.0,-0.581550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0200
90273,0.852348,-0.289369,-0.917789,-0.944698,0.0,0.0,0.0,-0.156594,-0.478257,1.866366,0.0,0.0,0.0,0.0,0.0,-0.250629,-0.868278,0.0,-1.046915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.735672,-0.610398,0.0,0.0,0.0,0.990947,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.430357,0.0,0.0,0.0,1.719543,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010C


In [49]:
model_new.to_csv('ready_to_modeling.csv', encoding='utf-8', index=False)