# Import packages

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
from sklearn import preprocessing, metrics
from sklearn.pipeline import Pipeline, FeatureUnion
import plotly
import plotly.graph_objs as go
from sklearn.base import TransformerMixin, BaseEstimator
import matplotlib.pyplot as plt
import nltk.stem
from datetime import datetime as dt
#np.set_printoptions(threshold=np.nan)

  from numpy.core.umath_tests import inner1d


In [2]:
train = pd.read_csv('ADAS_File_5_updated.csv',encoding='latin-1',error_bad_lines=False)

In [3]:
test=pd.read_csv('data_unlabelled_xing.csv',encoding='latin-1',error_bad_lines=False)
test=test[test['batch']==2]
test = test.loc[(test['FAILDATE'] >= 20120101) & (test['FAILDATE'] <= 20190000)]


Columns (28,36,44) have mixed types. Specify dtype option on import or set low_memory=False.



In [4]:
ntrain = train.shape[0]
ntest = test.shape[0]
data = pd.concat((train, test)).reset_index(drop=True)
print("data size is : {}".format(data.shape))


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





data size is : (157602, 70)


In [5]:
train.shape

(2496, 59)

In [6]:
test.shape

(155106, 60)

In [7]:
adas1=train['ADAS1']
aeb1=train['AutomaticEmergencyBraking1']
acc1=train['AdaptiveCruiseControl1']

In [8]:
data['FAILDATE'] = pd.to_datetime(data['FAILDATE'], format = "%Y%m%d")
data['AGE'] = data['FAILDATE'].apply(lambda x: int(str(x)[:4])) - data['YEARTXT']
data['AGE'] = data['AGE'].apply(lambda x: 0 if x < 0 else x)
data['MODEL_YEAR'] = data['MFR_NAME'] + data['MAKETXT'] + data['YEARTXT'].apply(lambda x : str(x))
data['MFR_NAME'] = data['MFR_NAME'] + data['MAKETXT']
## Same model name can be used in different makes
data['MODELTXT']=data["MAKETXT"].map(str) +' '+ data["MODELTXT"]
data['CITY'] = data['CITY'] + data['STATE']
data['FAILDATE'] = pd.to_datetime(data['FAILDATE'], format = "%Y%m%d")
data['FAILMONTH'] = data['FAILDATE'].apply(lambda x : x.month)
data['FAILWEEKDAY'] = data['FAILDATE'].dt.dayofweek

In [9]:
cols_use = ['MFR_NAME', 'MAKETXT','MODELTXT','MODEL_YEAR', 'YEARTXT', 'CRASH', 'FIRE', 'INJURED', 'DEATHS', 'COMPDESC', 'CITY', 'STATE', 'MILES',
            'OCCURENCES', 'CDESCR', 'ANTI_BRAKES_YN', 'CRUISE_CONT_YN', 'VEH_SPEED', 'AGE', 'FAILMONTH', 'FAILWEEKDAY']

data = data[cols_use]

In [10]:
data["CDESCR"] = data["CDESCR"].apply(lambda x : str(x).lower())
data["COMPDESC"] = data["COMPDESC"].apply(lambda x : x.lower())

# Combine the brand and the car type to make sure the type is unique
- We find that since different car brands may have the same name of their car types, we cannot uniquely identify the model of the car without combineing the car's brand and the car's model. Therefore, we combine the brand (`MAKETXT`) and the car type (`MODELTXT`) to make sure the car's model is unique

# Brief description about the numeric variable
- After looking carefully of the numeric variable, we think that in the true dataset, there may be outliers at `INJURED` (number of people injured in the accident), `DEATHS` (number of people dead in the accident), `OCCURENCES` (number of the mulfunction condition occured), `VEH_SPEED` (the speed of the car when the accident happened). 

In [11]:
data.describe()
### outlier in INJURED, DEATHS, OCCURENCES(number of occurence), VEH_SPEED

Unnamed: 0,YEARTXT,INJURED,DEATHS,MILES,OCCURENCES,VEH_SPEED,AGE,FAILMONTH,FAILWEEKDAY
count,157602.0,56182.0,53547.0,129135.0,43247.0,104146.0,157602.0,157602.0,157602.0
mean,2013.748493,0.183404,0.008927,34832.97,1.190348,30.761642,1.958091,6.37917,2.667942
std,1.559546,0.735022,0.494464,85329.86,1.22066,29.439233,1.715441,3.321202,1.918884
min,2012.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,2012.0,0.0,0.0,5000.0,1.0,5.0,0.0,4.0,1.0
50%,2013.0,0.0,0.0,23000.0,1.0,30.0,2.0,6.0,3.0
75%,2015.0,0.0,0.0,51628.0,1.0,55.0,3.0,9.0,4.0
max,2018.0,99.0,99.0,9000443.0,100.0,999.0,6.0,12.0,6.0


In [12]:
data.head()

Unnamed: 0,MFR_NAME,MAKETXT,MODELTXT,MODEL_YEAR,YEARTXT,CRASH,FIRE,INJURED,DEATHS,COMPDESC,...,STATE,MILES,OCCURENCES,CDESCR,ANTI_BRAKES_YN,CRUISE_CONT_YN,VEH_SPEED,AGE,FAILMONTH,FAILWEEKDAY
0,Chrysler (FCA US LLC)CHRYSLER,CHRYSLER,CHRYSLER 200,Chrysler (FCA US LLC)CHRYSLER2015,2015,Y,N,1.0,0.0,seat belts,...,SC,78000.0,1.0,tl* the contact owned a 2015 chrysler 200. the...,N,N,60.0,2,9,1
1,Hyundai Motor AmericaHYUNDAI,HYUNDAI,HYUNDAI SANTA FE,Hyundai Motor AmericaHYUNDAI2015,2015,N,N,,,electrical system,...,IN,21500.0,1.0,similar to incident 10885095; blind spot detec...,N,N,70.0,1,11,3
2,Toyota Motor CorporationTOYOTA,TOYOTA,TOYOTA LAND CRUISER,Toyota Motor CorporationTOYOTA2016,2016,N,N,,,electrical system,...,CA,6000.0,,purchased vehicle in early 2016. may/june 201...,N,N,0.0,1,3,0
3,"Tesla Motors, Inc.TESLA",TESLA,TESLA MODEL S,"Tesla Motors, Inc.TESLA2016",2016,N,N,,,suspension,...,CA,7000.0,,here is a tesla in the salvage yard. https://...,N,N,30.0,0,11,1
4,Ford Motor CompanyFORD,FORD,FORD ESCAPE,Ford Motor CompanyFORD2014,2014,N,Y,,,unknown or other,...,PA,58205.0,,driving down the road (4 lane highway) and my ...,N,N,70.0,3,1,0


In [13]:
cv=StratifiedKFold(n_splits=5,shuffle=True,random_state=1)

In [14]:
# another split method: base on time series, we use the preivious data to predict the future data

#cv=TimeSeriesSplit(n_splits=5)    
#data=data.sort_values(by=['DATEA'])
#data['DATEA'].head()

# Handle outliers
- For `INJURED` and `DEATHS`, a maximum accomdation for a car/minivan is 8 people. If the number of injured or dead people is over 10, we adjust it to 8.

- For the `OCCURENCES`, our boundary is qualtile 95th. If the number of occurence is over the number at 95th qualtile, we adjust it to the number at 95th qualtile.

- For the `VEH_SPEED`, if the speed of the car exceed 160 km/h, we adjust it to 160.

In [15]:
data["INJURED"]=data["INJURED"].clip(upper=8)

In [16]:
data["DEATHS"]=data["DEATHS"].clip(upper=8)

In [17]:
data["OCCURENCES"]=data["OCCURENCES"].clip(upper=data["OCCURENCES"].quantile(q=0.95))

In [18]:
data["VEH_SPEED"]=data["VEH_SPEED"].clip(upper=160)

# Handle "NA"
- For `INJURED` and `DEATHS`, we use 0 to fill the null value.
- For `MILES` (the miles of the car), we fill the number that is the median of the `MILES` add a random number form the normal distribution ranged from 0 to 3000.
- For `OCCURENCES`, we use 1 to fill the null value.
- For `VEH_SPEED`, we fill the number that is the median of the `VEH_SPEED` add a random number form the normal distribution ranged from 0 to 200.
- For `MEDICAL_ATTN` (Was medical attention required) and `VEHICLES_TOWED_YN` (Was vehicle towed), since they are binary of "Y" (yes) and "N" (no), we decide to use "N" to fill the null value.

In [19]:
## fill na
data['INJURED'] = data['INJURED'].fillna(0)
data['DEATHS'] = data['DEATHS'].fillna(0)
data['MILES'] = data['MILES'].fillna(data[:ntrain]['MILES'].median()+np.random.normal(0,3000))
data['VEH_SPEED'] = data['VEH_SPEED'].fillna(data[:ntrain]['VEH_SPEED'].median()+np.random.normal(0,200))
data['OCCURENCES'] = data['OCCURENCES'].fillna(1)
data['ANTI_BRAKES_YN'] = data['ANTI_BRAKES_YN'].fillna('N')
data['CRUISE_CONT_YN'] = data['CRUISE_CONT_YN'].fillna('N')

In [20]:
data.head()

Unnamed: 0,MFR_NAME,MAKETXT,MODELTXT,MODEL_YEAR,YEARTXT,CRASH,FIRE,INJURED,DEATHS,COMPDESC,...,STATE,MILES,OCCURENCES,CDESCR,ANTI_BRAKES_YN,CRUISE_CONT_YN,VEH_SPEED,AGE,FAILMONTH,FAILWEEKDAY
0,Chrysler (FCA US LLC)CHRYSLER,CHRYSLER,CHRYSLER 200,Chrysler (FCA US LLC)CHRYSLER2015,2015,Y,N,1.0,0.0,seat belts,...,SC,78000.0,1.0,tl* the contact owned a 2015 chrysler 200. the...,N,N,60.0,2,9,1
1,Hyundai Motor AmericaHYUNDAI,HYUNDAI,HYUNDAI SANTA FE,Hyundai Motor AmericaHYUNDAI2015,2015,N,N,0.0,0.0,electrical system,...,IN,21500.0,1.0,similar to incident 10885095; blind spot detec...,N,N,70.0,1,11,3
2,Toyota Motor CorporationTOYOTA,TOYOTA,TOYOTA LAND CRUISER,Toyota Motor CorporationTOYOTA2016,2016,N,N,0.0,0.0,electrical system,...,CA,6000.0,1.0,purchased vehicle in early 2016. may/june 201...,N,N,0.0,1,3,0
3,"Tesla Motors, Inc.TESLA",TESLA,TESLA MODEL S,"Tesla Motors, Inc.TESLA2016",2016,N,N,0.0,0.0,suspension,...,CA,7000.0,1.0,here is a tesla in the salvage yard. https://...,N,N,30.0,0,11,1
4,Ford Motor CompanyFORD,FORD,FORD ESCAPE,Ford Motor CompanyFORD2014,2014,N,Y,0.0,0.0,unknown or other,...,PA,58205.0,1.0,driving down the road (4 lane highway) and my ...,N,N,70.0,3,1,0


We continue to encode the data, for further model building process in sklearn.

In [21]:
# we can also try OneHotEncoder() or CategoricalDummifier() within pipeline later
#data = pd.get_dummies(data, columns=data.columns.difference(['CDESCR']))

In [22]:
train = data[:ntrain]
test = data[ntrain:]

In [23]:
np.random.seed(1)

In [24]:
label_feats=['YEARTXT','FAILMONTH','FAILWEEKDAY']
text_feat= ['CDESCR']
cat_feats = set(data.dtypes[data.dtypes == "object"].index) | set(label_feats) - set(text_feat)
num_feats = set(data.dtypes[data.dtypes != "object"].index) - set(label_feats)

make sure all numeric variables are in the same scale

In [25]:
scaler = StandardScaler()

In [26]:
class FactorExtractor(TransformerMixin, BaseEstimator):
    """
    In: pd.DataFrame
        Column in that Frame
    Out: pd.Series
    
    In: pd.DataFrame
        list of Columns in that Frame
    Out: pd.DataFrame
    """

    def __init__(self, factor):
        self.factor = factor

    def transform(self, data):
        return data.loc[:,self.factor]

    def fit(self, *_):
        return self


In [27]:
! pip install category_encoders



In [28]:
import category_encoders as ce
encoder = ce.OneHotEncoder(cols = cat_feats,drop_invariant=True,use_cat_names=True,handle_unknown='ignore')

In [29]:
nmf = NMF(init='random', random_state=0)

In [30]:
# allow stemming in Sklearn TfidfVectorizer
en_stemmer = nltk.stem.SnowballStemmer('english')
class StemmedTfidfVectorizer(TfidfVectorizer):
    def build_analyzer(self):
        analyzer = super(StemmedTfidfVectorizer, self).build_analyzer()
        return lambda doc: ([en_stemmer.stem(w) for w in analyzer(doc)])

tfidf_vectorizer= StemmedTfidfVectorizer(analyzer="word", stop_words='english')

In [31]:
gbm = GradientBoostingClassifier(learning_rate=0.05)

In [32]:
catpipe=Pipeline([ ('cat_extractor',FactorExtractor(cat_feats)),
                   ('encode', encoder)
                  ])

numpipe=Pipeline([ ('feat_extractor',FactorExtractor(num_feats)),
                   ('Std',scaler)
                  ])

# extract the value in the list, so we can pass a pd series to the vectorizer, instead of a 1-column dataframe
textpipe=Pipeline([('text_extractor', FactorExtractor(text_feat[0])),  
                   ('tfidf',tfidf_vectorizer)#,
                   #('count',vectorizer)
                   #('nmf',nmf)
                  ])

feat_union=FeatureUnion([('text',textpipe),
                         ('num',numpipe),
                         ('cat',catpipe)
                        ],n_jobs=-1)


pipe=Pipeline([ #('imputer',imp),
                ('union',feat_union),
                #('Std',scaler),
                ('clf',gbm)])

In [33]:
param_dist=dict(union__text__tfidf__max_df=[0.7,0.8,0.9],
                union__text__tfidf__min_df=range(1,15),
                union__text__tfidf__binary=[True,False],
                #union__text__nmf__n_components=range(5,100,2), 
                clf__loss=['deviance','exponential'],
                clf__n_estimators=[100,200],
                clf__subsample=[s/float(100) for s in range(50, 101, 2)],
                clf__max_features=[s/float(100) for s in range(1, 90, 3)],
                clf__max_depth=range(2,5),
                clf__min_samples_leaf=range(5,30,3),
                clf__min_samples_split=range(5,30),
                clf__random_state =range(1,10))

model=RandomizedSearchCV(pipe,param_dist,cv=cv,n_iter=30,n_jobs=-1,random_state=1,scoring="roc_auc")


In [34]:
model.fit(train,adas1)


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed parallel loops cannot be nested, setting n_jobs=1


Multiprocessing-backed p

RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=1, shuffle=True),
          error_score='raise',
          estimator=Pipeline(memory=None,
     steps=[('union', FeatureUnion(n_jobs=-1,
       transformer_list=[('text', Pipeline(memory=None,
     steps=[('text_extractor', FactorExtractor(factor='CDESCR')), ('tfidf', StemmedTfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
            dtype=<class 'numpy.int64'>, encoding=...      presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False))]),
          fit_params=None, iid=True, n_iter=30, n_jobs=-1,
          param_distributions={'union__text__tfidf__max_df': [0.7, 0.8, 0.9], 'union__text__tfidf__min_df': range(1, 5), 'union__text__tfidf__binary': [True, False], 'clf__loss': ['deviance', 'exponential'], 'clf__n_estimators': [100, 200], 'clf__subsample': [0.5, 0.52, 0.54, 0.56, 0.58, 0.6, 0.62, 0.64, 0.66...s_leaf': range(5, 30, 3), 'clf__min_samples_split': range(5, 30

In [35]:
model.grid_scores_


The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20



[mean: 0.92311, std: 0.01501, params: {'union__text__tfidf__min_df': 2, 'union__text__tfidf__max_df': 0.7, 'union__text__tfidf__binary': False, 'clf__subsample': 0.88, 'clf__random_state': 3, 'clf__n_estimators': 200, 'clf__min_samples_split': 23, 'clf__min_samples_leaf': 14, 'clf__max_features': 0.34, 'clf__max_depth': 4, 'clf__loss': 'deviance'},
 mean: 0.92456, std: 0.01214, params: {'union__text__tfidf__min_df': 1, 'union__text__tfidf__max_df': 0.9, 'union__text__tfidf__binary': False, 'clf__subsample': 0.84, 'clf__random_state': 6, 'clf__n_estimators': 200, 'clf__min_samples_split': 5, 'clf__min_samples_leaf': 5, 'clf__max_features': 0.37, 'clf__max_depth': 4, 'clf__loss': 'exponential'},
 mean: 0.88187, std: 0.02374, params: {'union__text__tfidf__min_df': 1, 'union__text__tfidf__max_df': 0.7, 'union__text__tfidf__binary': True, 'clf__subsample': 0.52, 'clf__random_state': 4, 'clf__n_estimators': 100, 'clf__min_samples_split': 14, 'clf__min_samples_leaf': 26, 'clf__max_features': 

In [36]:
model.best_score_

0.9257505156966767

In [37]:
model.best_params_

{'clf__loss': 'exponential',
 'clf__max_depth': 4,
 'clf__max_features': 0.37,
 'clf__min_samples_leaf': 14,
 'clf__min_samples_split': 21,
 'clf__n_estimators': 100,
 'clf__random_state': 3,
 'clf__subsample': 0.84,
 'union__text__tfidf__binary': False,
 'union__text__tfidf__max_df': 0.9,
 'union__text__tfidf__min_df': 2}

In [38]:
# test set is too big, it raise error on prediction
test1=test[:50000]
test2=test[50000:100000]
test3=test[100000:]

In [41]:
t=test.head(2)
t

Unnamed: 0,MFR_NAME,MAKETXT,MODELTXT,MODEL_YEAR,YEARTXT,CRASH,FIRE,INJURED,DEATHS,COMPDESC,...,STATE,MILES,OCCURENCES,CDESCR,ANTI_BRAKES_YN,CRUISE_CONT_YN,VEH_SPEED,AGE,FAILMONTH,FAILWEEKDAY
2496,Chrysler (FCA US LLC)RAM,RAM,RAM 3500,Chrysler (FCA US LLC)RAM2012,2012,N,N,0.0,0.0,unknown or other,...,TX,8692.210559,1.0,2012 dodge ram 3500. consumer writes in regard...,N,N,-72.859131,6,2,4
2497,FORD MOTOR COMPANYFORD,FORD,FORD FUSION,FORD MOTOR COMPANYFORD2012,2012,N,N,0.0,0.0,electrical system,...,CA,990.0,2.0,"on january 2nd, 2012 on 2-separate occasions t...",N,N,0.0,0,1,0


In [75]:
list(cat_feats)

['MODEL_YEAR',
 'FAILMONTH',
 'MAKETXT',
 'FIRE',
 'YEARTXT',
 'CITY',
 'CRUISE_CONT_YN',
 'CRASH',
 'STATE',
 'FAILWEEKDAY',
 'MODELTXT',
 'CDESCR',
 'MFR_NAME',
 'ANTI_BRAKES_YN',
 'COMPDESC']

In [76]:
model.best_estimator_.steps[0][1].transformer_list[2][1].steps[1][1].get_dummies(t[list(cat_feats)])

Unnamed: 0,MODEL_YEAR_Chrysler (FCA US LLC)CHRYSLER2015,MODEL_YEAR_Hyundai Motor AmericaHYUNDAI2015,MODEL_YEAR_Toyota Motor CorporationTOYOTA2016,"MODEL_YEAR_Tesla Motors, Inc.TESLA2016",MODEL_YEAR_Ford Motor CompanyFORD2014,MODEL_YEAR_Keystone RV CompanyKEYSTONE2017,"MODEL_YEAR_Tesla Motors, Inc.TESLA2015",MODEL_YEAR_Chrysler (FCA US LLC)DODGE2016,MODEL_YEAR_Chrysler (FCA US LLC)DODGE2014,"MODEL_YEAR_Nissan North America, Inc.INFINITI2015",...,COMPDESC_parking brake,COMPDESC_lane departure: lane keep: automatic steering,"COMPDESC_service brakes, hydraulic:switches:brake warning",COMPDESC_structure:body:hatchback/liftgate,COMPDESC_parking brake:conventional:mechanical:linkage and cable,COMPDESC_back over prevention: camera system,"COMPDESC_service brakes, hydraulic:foundation components:disc:caliper",COMPDESC_lane departure,COMPDESC_back over prevention: sensing system: camera,COMPDESC_interior lighting
2496,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2497,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
get_feature_names()

In [79]:
model.best_estimator_.steps[0][1].transformer_list[0][1].steps[1][1].get_feature_names()

['00',
 '000',
 '000km',
 '002',
 '01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '093',
 '10',
 '100',
 '1000',
 '105',
 '10k',
 '10mph',
 '10th',
 '11',
 '110',
 '1100',
 '112',
 '11234',
 '114',
 '115',
 '12',
 '120',
 '1200',
 '12000',
 '125',
 '1250',
 '126',
 '12k',
 '12th',
 '13',
 '130',
 '1300',
 '13553',
 '13mm',
 '14',
 '140',
 '1400',
 '14559',
 '14th',
 '14v',
 '14v154000',
 '14v391000',
 '14v634000',
 '14v643000',
 '15',
 '150',
 '1500',
 '157',
 '15k',
 '15mph',
 '15v',
 '15v041000',
 '15v393000',
 '15v431000',
 '15v461000',
 '15v470000',
 '15v509000',
 '15v541000',
 '15v614000',
 '15v675000',
 '15v728000',
 '15v800000',
 '16',
 '160',
 '1600',
 '165',
 '16s19',
 '16th',
 '16v',
 '16v029000',
 '16v061000',
 '16v078000',
 '16v240000',
 '16v244000',
 '16v245000',
 '16v461000',
 '16v621000',
 '16v628000',
 '16v643000',
 '16v668000',
 '16v755000',
 '16v875000',
 '17',
 '17276',
 '175',
 '17th',
 '17v114000',
 '17v285000',
 '17v627000',
 '18',
 '180',
 '1800',


In [50]:
textpipe

Pipeline(memory=None,
     steps=[('text_extractor', FactorExtractor(factor='CDESCR')), ('tfidf', StemmedTfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
            dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
            lowercase=True, max_df=1.0, max_features=None, min_df=1,
       ...lse, token_pattern='(?u)\\b\\w\\w+\\b',
            tokenizer=None, use_idf=True, vocabulary=None))])

In [39]:
prediction1=model.predict(test1)
prediction2=model.predict(test2)
prediction3=model.predict(test3)

MaybeEncodingError: Error sending result: '[        MODEL_YEAR_Chrysler (FCA US LLC)CHRYSLER2015  \
102496                                             0   
102497                                             0   
102498                                             0   
102499                                             0   
102500                                             0   
102501                                             0   
102502                                             1   
102503                                             0   
102504                                             0   
102505                                             0   
102506                                             0   
102507                                             0   
102508                                             0   
102509                                             0   
102510                                             0   
102511                                             0   
102512                                             0   
102513                                             0   
102514                                             0   
102515                                             0   
102516                                             0   
102517                                             0   
102518                                             0   
102519                                             0   
102520                                             0   
102521                                             0   
102522                                             0   
102523                                             0   
102524                                             0   
102525                                             0   
...                                              ...   
157572                                             0   
157573                                             0   
157574                                             0   
157575                                             0   
157576                                             0   
157577                                             0   
157578                                             0   
157579                                             0   
157580                                             0   
157581                                             0   
157582                                             0   
157583                                             0   
157584                                             0   
157585                                             0   
157586                                             0   
157587                                             0   
157588                                             0   
157589                                             0   
157590                                             0   
157591                                             0   
157592                                             0   
157593                                             0   
157594                                             0   
157595                                             0   
157596                                             0   
157597                                             0   
157598                                             0   
157599                                             0   
157600                                             0   
157601                                             0   

        MODEL_YEAR_Hyundai Motor AmericaHYUNDAI2015  \
102496                                            0   
102497                                            0   
102498                                            0   
102499                                            0   
102500                                            0   
102501                                            0   
102502                                            0   
102503                                            0   
102504                                            0   
102505                                            0   
102506                                            0   
102507                                            0   
102508                                            0   
102509                                            0   
102510                                            0   
102511                                            0   
102512                                            0   
102513                                            0   
102514                                            0   
102515                                            0   
102516                                            0   
102517                                            0   
102518                                            0   
102519                                            0   
102520                                            0   
102521                                            0   
102522                                            0   
102523                                            0   
102524                                            0   
102525                                            0   
...                                             ...   
157572                                            0   
157573                                            0   
157574                                            0   
157575                                            0   
157576                                            0   
157577                                            0   
157578                                            0   
157579                                            0   
157580                                            0   
157581                                            0   
157582                                            0   
157583                                            0   
157584                                            0   
157585                                            0   
157586                                            0   
157587                                            0   
157588                                            0   
157589                                            0   
157590                                            0   
157591                                            0   
157592                                            0   
157593                                            0   
157594                                            0   
157595                                            0   
157596                                            0   
157597                                            0   
157598                                            0   
157599                                            0   
157600                                            0   
157601                                            0   

        MODEL_YEAR_Toyota Motor CorporationTOYOTA2016  \
102496                                              0   
102497                                              0   
102498                                              0   
102499                                              0   
102500                                              0   
102501                                              0   
102502                                              0   
102503                                              0   
102504                                              0   
102505                                              0   
102506                                              0   
102507                                              0   
102508                                              0   
102509                                              0   
102510                                              0   
102511                                              0   
102512                                              0   
102513                                              0   
102514                                              0   
102515                                              0   
102516                                              0   
102517                                              0   
102518                                              0   
102519                                              0   
102520                                              0   
102521                                              0   
102522                                              0   
102523                                              0   
102524                                              0   
102525                                              0   
...                                               ...   
157572                                              0   
157573                                              0   
157574                                              0   
157575                                              0   
157576                                              0   
157577                                              0   
157578                                              0   
157579                                              0   
157580                                              0   
157581                                              0   
157582                                              0   
157583                                              0   
157584                                              0   
157585                                              0   
157586                                              0   
157587                                              0   
157588                                              0   
157589                                              0   
157590                                              0   
157591                                              0   
157592                                              0   
157593                                              0   
157594                                              0   
157595                                              0   
157596                                              0   
157597                                              0   
157598                                              0   
157599                                              0   
157600                                              0   
157601                                              0   

        MODEL_YEAR_Tesla Motors, Inc.TESLA2016  \
102496                                       0   
102497                                       0   
102498                                       0   
102499                                       0   
102500                                       0   
102501                                       0   
102502                                       0   
102503                                       0   
102504                                       0   
102505                                       0   
102506                                       0   
102507                                       0   
102508                                       0   
102509                                       0   
102510                                       0   
102511                                       0   
102512                                       0   
102513                                       0   
102514                                       0   
102515                                       0   
102516                                       0   
102517                                       0   
102518                                       0   
102519                                       0   
102520                                       0   
102521                                       0   
102522                                       0   
102523                                       0   
102524                                       0   
102525                                       0   
...                                        ...   
157572                                       0   
157573                                       0   
157574                                       0   
157575                                       0   
157576                                       0   
157577                                       0   
157578                                       0   
157579                                       0   
157580                                       0   
157581                                       0   
157582                                       0   
157583                                       0   
157584                                       0   
157585                                       0   
157586                                       0   
157587                                       0   
157588                                       0   
157589                                       0   
157590                                       0   
157591                                       0   
157592                                       0   
157593                                       0   
157594                                       0   
157595                                       0   
157596                                       0   
157597                                       0   
157598                                       0   
157599                                       0   
157600                                       0   
157601                                       0   

        MODEL_YEAR_Ford Motor CompanyFORD2014  \
102496                                      0   
102497                                      0   
102498                                      0   
102499                                      0   
102500                                      0   
102501                                      0   
102502                                      0   
102503                                      0   
102504                                      0   
102505                                      0   
102506                                      0   
102507                                      0   
102508                                      0   
102509                                      0   
102510                                      0   
102511                                      0   
102512                                      0   
102513                                      0   
102514                                      0   
102515                                      1   
102516                                      1   
102517                                      0   
102518                                      0   
102519                                      0   
102520                                      0   
102521                                      0   
102522                                      0   
102523                                      1   
102524                                      1   
102525                                      0   
...                                       ...   
157572                                      0   
157573                                      0   
157574                                      0   
157575                                      0   
157576                                      0   
157577                                      0   
157578                                      0   
157579                                      0   
157580                                      0   
157581                                      0   
157582                                      0   
157583                                      0   
157584                                      0   
157585                                      0   
157586                                      0   
157587                                      0   
157588                                      0   
157589                                      0   
157590                                      0   
157591                                      0   
157592                                      0   
157593                                      0   
157594                                      0   
157595                                      0   
157596                                      0   
157597                                      0   
157598                                      0   
157599                                      0   
157600                                      0   
157601                                      0   

        MODEL_YEAR_Keystone RV CompanyKEYSTONE2017  \
102496                                           0   
102497                                           0   
102498                                           0   
102499                                           0   
102500                                           0   
102501                                           0   
102502                                           0   
102503                                           0   
102504                                           0   
102505                                           0   
102506                                           0   
102507                                           0   
102508                                           0   
102509                                           0   
102510                                           0   
102511                                           0   
102512                                           0   
102513                                           0   
102514                                           0   
102515                                           0   
102516                                           0   
102517                                           0   
102518                                           0   
102519                                           0   
102520                                           0   
102521                                           0   
102522                                           0   
102523                                           0   
102524                                           0   
102525                                           0   
...                                            ...   
157572                                           0   
157573                                           0   
157574                                           0   
157575                                           0   
157576                                           0   
157577                                           0   
157578                                           0   
157579                                           0   
157580                                           0   
157581                                           0   
157582                                           0   
157583                                           0   
157584                                           0   
157585                                           0   
157586                                           0   
157587                                           0   
157588                                           0   
157589                                           0   
157590                                           0   
157591                                           0   
157592                                           0   
157593                                           0   
157594                                           0   
157595                                           0   
157596                                           0   
157597                                           0   
157598                                           0   
157599                                           0   
157600                                           0   
157601                                           0   

        MODEL_YEAR_Tesla Motors, Inc.TESLA2015  \
102496                                       0   
102497                                       0   
102498                                       0   
102499                                       0   
102500                                       0   
102501                                       0   
102502                                       0   
102503                                       0   
102504                                       0   
102505                                       0   
102506                                       0   
102507                                       0   
102508                                       0   
102509                                       0   
102510                                       0   
102511                                       0   
102512                                       0   
102513                                       0   
102514                                       0   
102515                                       0   
102516                                       0   
102517                                       0   
102518                                       0   
102519                                       0   
102520                                       0   
102521                                       0   
102522                                       0   
102523                                       0   
102524                                       0   
102525                                       0   
...                                        ...   
157572                                       0   
157573                                       0   
157574                                       0   
157575                                       0   
157576                                       0   
157577                                       0   
157578                                       0   
157579                                       0   
157580                                       0   
157581                                       0   
157582                                       0   
157583                                       0   
157584                                       0   
157585                                       0   
157586                                       0   
157587                                       0   
157588                                       0   
157589                                       0   
157590                                       0   
157591                                       0   
157592                                       0   
157593                                       0   
157594                                       0   
157595                                       0   
157596                                       0   
157597                                       0   
157598                                       0   
157599                                       0   
157600                                       0   
157601                                       0   

        MODEL_YEAR_Chrysler (FCA US LLC)DODGE2016  \
102496                                          0   
102497                                          0   
102498                                          0   
102499                                          0   
102500                                          0   
102501                                          0   
102502                                          0   
102503                                          0   
102504                                          0   
102505                                          0   
102506                                          0   
102507                                          0   
102508                                          0   
102509                                          0   
102510                                          0   
102511                                          0   
102512                                          0   
102513                                          0   
102514                                          0   
102515                                          0   
102516                                          0   
102517                                          0   
102518                                          0   
102519                                          0   
102520                                          0   
102521                                          0   
102522                                          0   
102523                                          0   
102524                                          0   
102525                                          0   
...                                           ...   
157572                                          0   
157573                                          0   
157574                                          0   
157575                                          0   
157576                                          0   
157577                                          0   
157578                                          0   
157579                                          0   
157580                                          0   
157581                                          0   
157582                                          0   
157583                                          0   
157584                                          0   
157585                                          0   
157586                                          1   
157587                                          1   
157588                                          0   
157589                                          0   
157590                                          0   
157591                                          0   
157592                                          0   
157593                                          0   
157594                                          0   
157595                                          0   
157596                                          0   
157597                                          0   
157598                                          0   
157599                                          0   
157600                                          0   
157601                                          0   

        MODEL_YEAR_Chrysler (FCA US LLC)DODGE2014  \
102496                                          0   
102497                                          0   
102498                                          0   
102499                                          0   
102500                                          0   
102501                                          0   
102502                                          0   
102503                                          0   
102504                                          0   
102505                                          0   
102506                                          0   
102507                                          0   
102508                                          0   
102509                                          0   
102510                                          0   
102511                                          0   
102512                                          0   
102513                                          0   
102514                                          0   
102515                                          0   
102516                                          0   
102517                                          0   
102518                                          0   
102519                                          0   
102520                                          0   
102521                                          0   
102522                                          0   
102523                                          0   
102524                                          0   
102525                                          0   
...                                           ...   
157572                                          0   
157573                                          0   
157574                                          0   
157575                                          0   
157576                                          0   
157577                                          0   
157578                                          0   
157579                                          0   
157580                                          0   
157581                                          0   
157582                                          0   
157583                                          0   
157584                                          0   
157585                                          0   
157586                                          0   
157587                                          0   
157588                                          0   
157589                                          0   
157590                                          0   
157591                                          0   
157592                                          0   
157593                                          0   
157594                                          0   
157595                                          0   
157596                                          0   
157597                                          0   
157598                                          0   
157599                                          0   
157600                                          0   
157601                                          0   

        MODEL_YEAR_Nissan North America, Inc.INFINITI2015  \
102496                                                  0   
102497                                                  0   
102498                                                  0   
102499                                                  0   
102500                                                  0   
102501                                                  0   
102502                                                  0   
102503                                                  0   
102504                                                  0   
102505                                                  0   
102506                                                  0   
102507                                                  0   
102508                                                  0   
102509                                                  0   
102510                                                  0   
102511                                                  0   
102512                                                  0   
102513                                                  0   
102514                                                  0   
102515                                                  0   
102516                                                  0   
102517                                                  0   
102518                                                  0   
102519                                                  0   
102520                                                  0   
102521                                                  0   
102522                                                  0   
102523                                                  0   
102524                                                  0   
102525                                                  0   
...                                                   ...   
157572                                                  0   
157573                                                  0   
157574                                                  0   
157575                                                  0   
157576                                                  0   
157577                                                  0   
157578                                                  0   
157579                                                  0   
157580                                                  0   
157581                                                  0   
157582                                                  0   
157583                                                  0   
157584                                                  0   
157585                                                  0   
157586                                                  0   
157587                                                  0   
157588                                                  0   
157589                                                  0   
157590                                                  0   
157591                                                  0   
157592                                                  0   
157593                                                  0   
157594                                                  0   
157595                                                  0   
157596                                                  0   
157597                                                  0   
157598                                                  0   
157599                                                  0   
157600                                                  0   
157601                                                  0   

                   ...              COMPDESC_parking brake  \
102496             ...                                   0   
102497             ...                                   0   
102498             ...                                   0   
102499             ...                                   0   
102500             ...                                   0   
102501             ...                                   0   
102502             ...                                   0   
102503             ...                                   0   
102504             ...                                   0   
102505             ...                                   0   
102506             ...                                   0   
102507             ...                                   0   
102508             ...                                   0   
102509             ...                                   0   
102510             ...                                   0   
102511             ...                                   0   
102512             ...                                   0   
102513             ...                                   0   
102514             ...                                   0   
102515             ...                                   0   
102516             ...                                   0   
102517             ...                                   0   
102518             ...                                   0   
102519             ...                                   0   
102520             ...                                   0   
102521             ...                                   0   
102522             ...                                   0   
102523             ...                                   0   
102524             ...                                   0   
102525             ...                                   0   
...                ...                                 ...   
157572             ...                                   0   
157573             ...                                   0   
157574             ...                                   0   
157575             ...                                   0   
157576             ...                                   0   
157577             ...                                   0   
157578             ...                                   0   
157579             ...                                   0   
157580             ...                                   0   
157581             ...                                   0   
157582             ...                                   0   
157583             ...                                   0   
157584             ...                                   0   
157585             ...                                   0   
157586             ...                                   0   
157587             ...                                   0   
157588             ...                                   0   
157589             ...                                   0   
157590             ...                                   0   
157591             ...                                   0   
157592             ...                                   0   
157593             ...                                   0   
157594             ...                                   0   
157595             ...                                   0   
157596             ...                                   0   
157597             ...                                   0   
157598             ...                                   0   
157599             ...                                   0   
157600             ...                                   0   
157601             ...                                   0   

        COMPDESC_lane departure: lane keep: automatic steering  \
102496                                                  0        
102497                                                  0        
102498                                                  0        
102499                                                  0        
102500                                                  0        
102501                                                  0        
102502                                                  0        
102503                                                  0        
102504                                                  0        
102505                                                  0        
102506                                                  0        
102507                                                  0        
102508                                                  0        
102509                                                  0        
102510                                                  0        
102511                                                  0        
102512                                                  0        
102513                                                  0        
102514                                                  0        
102515                                                  0        
102516                                                  0        
102517                                                  0        
102518                                                  0        
102519                                                  0        
102520                                                  0        
102521                                                  0        
102522                                                  0        
102523                                                  0        
102524                                                  0        
102525                                                  0        
...                                                   ...        
157572                                                  0        
157573                                                  0        
157574                                                  0        
157575                                                  0        
157576                                                  0        
157577                                                  0        
157578                                                  0        
157579                                                  0        
157580                                                  0        
157581                                                  0        
157582                                                  0        
157583                                                  0        
157584                                                  0        
157585                                                  0        
157586                                                  0        
157587                                                  0        
157588                                                  0        
157589                                                  0        
157590                                                  0        
157591                                                  0        
157592                                                  0        
157593                                                  0        
157594                                                  0        
157595                                                  0        
157596                                                  0        
157597                                                  0        
157598                                                  0        
157599                                                  0        
157600                                                  0        
157601                                                  0        

        COMPDESC_service brakes, hydraulic:switches:brake warning  \
102496                                                  0           
102497                                                  0           
102498                                                  0           
102499                                                  0           
102500                                                  0           
102501                                                  0           
102502                                                  0           
102503                                                  0           
102504                                                  0           
102505                                                  0           
102506                                                  0           
102507                                                  0           
102508                                                  0           
102509                                                  0           
102510                                                  0           
102511                                                  0           
102512                                                  0           
102513                                                  0           
102514                                                  0           
102515                                                  0           
102516                                                  0           
102517                                                  0           
102518                                                  0           
102519                                                  0           
102520                                                  0           
102521                                                  0           
102522                                                  0           
102523                                                  0           
102524                                                  0           
102525                                                  0           
...                                                   ...           
157572                                                  0           
157573                                                  0           
157574                                                  0           
157575                                                  0           
157576                                                  0           
157577                                                  0           
157578                                                  0           
157579                                                  0           
157580                                                  0           
157581                                                  0           
157582                                                  0           
157583                                                  0           
157584                                                  0           
157585                                                  0           
157586                                                  0           
157587                                                  0           
157588                                                  0           
157589                                                  0           
157590                                                  0           
157591                                                  0           
157592                                                  0           
157593                                                  0           
157594                                                  0           
157595                                                  0           
157596                                                  0           
157597                                                  0           
157598                                                  0           
157599                                                  0           
157600                                                  0           
157601                                                  0           

        COMPDESC_structure:body:hatchback/liftgate  \
102496                                           0   
102497                                           0   
102498                                           0   
102499                                           0   
102500                                           0   
102501                                           0   
102502                                           0   
102503                                           0   
102504                                           0   
102505                                           0   
102506                                           0   
102507                                           0   
102508                                           0   
102509                                           0   
102510                                           0   
102511                                           0   
102512                                           0   
102513                                           0   
102514                                           0   
102515                                           0   
102516                                           0   
102517                                           0   
102518                                           0   
102519                                           0   
102520                                           0   
102521                                           0   
102522                                           0   
102523                                           0   
102524                                           0   
102525                                           0   
...                                            ...   
157572                                           0   
157573                                           0   
157574                                           0   
157575                                           0   
157576                                           0   
157577                                           0   
157578                                           0   
157579                                           0   
157580                                           0   
157581                                           0   
157582                                           0   
157583                                           0   
157584                                           0   
157585                                           0   
157586                                           0   
157587                                           0   
157588                                           0   
157589                                           0   
157590                                           0   
157591                                           0   
157592                                           0   
157593                                           0   
157594                                           0   
157595                                           0   
157596                                           0   
157597                                           0   
157598                                           0   
157599                                           0   
157600                                           0   
157601                                           0   

        COMPDESC_parking brake:conventional:mechanical:linkage and cable  \
102496                                                  0                  
102497                                                  0                  
102498                                                  0                  
102499                                                  0                  
102500                                                  0                  
102501                                                  0                  
102502                                                  0                  
102503                                                  0                  
102504                                                  0                  
102505                                                  0                  
102506                                                  0                  
102507                                                  0                  
102508                                                  0                  
102509                                                  0                  
102510                                                  0                  
102511                                                  0                  
102512                                                  0                  
102513                                                  0                  
102514                                                  0                  
102515                                                  0                  
102516                                                  0                  
102517                                                  0                  
102518                                                  0                  
102519                                                  0                  
102520                                                  0                  
102521                                                  0                  
102522                                                  0                  
102523                                                  0                  
102524                                                  0                  
102525                                                  0                  
...                                                   ...                  
157572                                                  0                  
157573                                                  0                  
157574                                                  0                  
157575                                                  0                  
157576                                                  0                  
157577                                                  0                  
157578                                                  0                  
157579                                                  0                  
157580                                                  0                  
157581                                                  0                  
157582                                                  0                  
157583                                                  0                  
157584                                                  0                  
157585                                                  0                  
157586                                                  0                  
157587                                                  0                  
157588                                                  0                  
157589                                                  0                  
157590                                                  0                  
157591                                                  0                  
157592                                                  0                  
157593                                                  0                  
157594                                                  0                  
157595                                                  0                  
157596                                                  0                  
157597                                                  0                  
157598                                                  0                  
157599                                                  0                  
157600                                                  0                  
157601                                                  0                  

        COMPDESC_back over prevention: camera system  \
102496                                             0   
102497                                             0   
102498                                             0   
102499                                             0   
102500                                             0   
102501                                             0   
102502                                             0   
102503                                             0   
102504                                             0   
102505                                             0   
102506                                             0   
102507                                             0   
102508                                             0   
102509                                             0   
102510                                             0   
102511                                             0   
102512                                             0   
102513                                             0   
102514                                             0   
102515                                             0   
102516                                             0   
102517                                             0   
102518                                             0   
102519                                             0   
102520                                             0   
102521                                             0   
102522                                             0   
102523                                             0   
102524                                             0   
102525                                             0   
...                                              ...   
157572                                             0   
157573                                             0   
157574                                             0   
157575                                             0   
157576                                             0   
157577                                             0   
157578                                             0   
157579                                             0   
157580                                             0   
157581                                             0   
157582                                             0   
157583                                             0   
157584                                             0   
157585                                             0   
157586                                             0   
157587                                             0   
157588                                             0   
157589                                             0   
157590                                             0   
157591                                             0   
157592                                             0   
157593                                             0   
157594                                             0   
157595                                             0   
157596                                             0   
157597                                             0   
157598                                             0   
157599                                             0   
157600                                             0   
157601                                             0   

        COMPDESC_service brakes, hydraulic:foundation components:disc:caliper  \
102496                                                  0                       
102497                                                  0                       
102498                                                  0                       
102499                                                  0                       
102500                                                  0                       
102501                                                  0                       
102502                                                  0                       
102503                                                  0                       
102504                                                  0                       
102505                                                  0                       
102506                                                  0                       
102507                                                  0                       
102508                                                  0                       
102509                                                  0                       
102510                                                  0                       
102511                                                  0                       
102512                                                  0                       
102513                                                  0                       
102514                                                  0                       
102515                                                  0                       
102516                                                  0                       
102517                                                  0                       
102518                                                  0                       
102519                                                  0                       
102520                                                  0                       
102521                                                  0                       
102522                                                  0                       
102523                                                  0                       
102524                                                  0                       
102525                                                  0                       
...                                                   ...                       
157572                                                  0                       
157573                                                  0                       
157574                                                  0                       
157575                                                  0                       
157576                                                  0                       
157577                                                  0                       
157578                                                  0                       
157579                                                  0                       
157580                                                  0                       
157581                                                  0                       
157582                                                  0                       
157583                                                  0                       
157584                                                  0                       
157585                                                  0                       
157586                                                  0                       
157587                                                  0                       
157588                                                  0                       
157589                                                  0                       
157590                                                  0                       
157591                                                  0                       
157592                                                  0                       
157593                                                  0                       
157594                                                  0                       
157595                                                  0                       
157596                                                  0                       
157597                                                  0                       
157598                                                  0                       
157599                                                  0                       
157600                                                  0                       
157601                                                  0                       

        COMPDESC_lane departure  \
102496                        0   
102497                        0   
102498                        0   
102499                        0   
102500                        0   
102501                        0   
102502                        0   
102503                        0   
102504                        0   
102505                        0   
102506                        0   
102507                        0   
102508                        0   
102509                        0   
102510                        0   
102511                        0   
102512                        0   
102513                        0   
102514                        0   
102515                        0   
102516                        0   
102517                        0   
102518                        0   
102519                        0   
102520                        0   
102521                        0   
102522                        0   
102523                        0   
102524                        0   
102525                        0   
...                         ...   
157572                        0   
157573                        0   
157574                        0   
157575                        0   
157576                        0   
157577                        0   
157578                        0   
157579                        0   
157580                        0   
157581                        0   
157582                        0   
157583                        0   
157584                        0   
157585                        0   
157586                        0   
157587                        0   
157588                        0   
157589                        0   
157590                        0   
157591                        0   
157592                        0   
157593                        0   
157594                        0   
157595                        0   
157596                        0   
157597                        0   
157598                        0   
157599                        0   
157600                        0   
157601                        0   

        COMPDESC_back over prevention: sensing system: camera  \
102496                                                  0       
102497                                                  0       
102498                                                  0       
102499                                                  0       
102500                                                  0       
102501                                                  0       
102502                                                  0       
102503                                                  0       
102504                                                  0       
102505                                                  0       
102506                                                  0       
102507                                                  0       
102508                                                  0       
102509                                                  0       
102510                                                  0       
102511                                                  0       
102512                                                  0       
102513                                                  0       
102514                                                  0       
102515                                                  0       
102516                                                  0       
102517                                                  0       
102518                                                  0       
102519                                                  0       
102520                                                  0       
102521                                                  0       
102522                                                  0       
102523                                                  0       
102524                                                  0       
102525                                                  0       
...                                                   ...       
157572                                                  0       
157573                                                  0       
157574                                                  0       
157575                                                  0       
157576                                                  0       
157577                                                  0       
157578                                                  0       
157579                                                  0       
157580                                                  0       
157581                                                  0       
157582                                                  0       
157583                                                  0       
157584                                                  0       
157585                                                  0       
157586                                                  0       
157587                                                  0       
157588                                                  0       
157589                                                  0       
157590                                                  0       
157591                                                  0       
157592                                                  0       
157593                                                  0       
157594                                                  0       
157595                                                  0       
157596                                                  0       
157597                                                  0       
157598                                                  0       
157599                                                  0       
157600                                                  0       
157601                                                  0       

        COMPDESC_interior lighting  
102496                           0  
102497                           0  
102498                           0  
102499                           0  
102500                           0  
102501                           0  
102502                           0  
102503                           0  
102504                           0  
102505                           0  
102506                           0  
102507                           0  
102508                           0  
102509                           1  
102510                           0  
102511                           0  
102512                           0  
102513                           0  
102514                           0  
102515                           0  
102516                           0  
102517                           0  
102518                           0  
102519                           0  
102520                           0  
102521                           0  
102522                           0  
102523                           0  
102524                           0  
102525                           0  
...                            ...  
157572                           0  
157573                           0  
157574                           0  
157575                           0  
157576                           0  
157577                           0  
157578                           0  
157579                           0  
157580                           0  
157581                           0  
157582                           0  
157583                           0  
157584                           0  
157585                           0  
157586                           0  
157587                           0  
157588                           0  
157589                           0  
157590                           0  
157591                           0  
157592                           0  
157593                           0  
157594                           0  
157595                           0  
157596                           0  
157597                           0  
157598                           0  
157599                           0  
157600                           0  
157601                           0  

[55106 rows x 5149 columns]]'. Reason: 'error("'i' format requires -2147483648 <= number <= 2147483647",)'

In [None]:
prediction = np.concatenate((prediction1, prediction2,prediction3), axis=None)

In [None]:
test['prediction'] = prediction

In [None]:
test.to_csv('datasince2012_test_predicted.csv')

In [None]:
test_sample=test[test['prediction']==1].sample(n=100,random_state=0)

In [None]:
test_sample.to_csv('datasince2012_test_predicted_sample100.csv')