In [31]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler
import xgboost as xgb

In [32]:
import sklearn.metrics as skm

In [33]:
from sklearn.linear_model import SGDClassifier

In [34]:
df=pd.read_csv('C:/Users/bossg/Downloads/Week8_train.csv')
df.drop('PARTY_ID',axis=1,inplace=True)
Y=df.TARGET
X=df.drop(['id','TARGET'],axis=1)

In [35]:
numerical_columns=[cname for cname in X.columns if (X[cname].nunique()>10 and X[cname].dtype!='object')]
categorical_columns=[cname for cname in X.columns if (X[cname].nunique()<=10 or X[cname].dtype=='object')]

In [105]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),('encoder',OneHotEncoder(handle_unknown="ignore"))
])
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer()),
    ('norm', StandardScaler()),('pca',PCA())
])
NB_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer()),
    ('norm', MinMaxScaler((0,1)))
])
NN_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer()),
    ('norm', MinMaxScaler((0,1)))
])

metapreprocessor=ColumnTransformer(transformers=[('cat', categorical_transformer, categorical_columns),('num', numerical_transformer, numerical_columns)])
preprocessor=ColumnTransformer(transformers=[('cat', categorical_transformer, categorical_columns),('num', numerical_transformer, numerical_columns)])
upreprocessor=ColumnTransformer(transformers=[('cat', categorical_transformer, categorical_columns),('num', numerical_transformer, numerical_columns)])
NBpreprocessor=ColumnTransformer(transformers=[('cat', categorical_transformer, categorical_columns),('num', NB_transformer, numerical_columns)])
NNpreprocessor=ColumnTransformer(transformers=[('cat', categorical_transformer, categorical_columns),('num', NN_transformer, numerical_columns)])

In [37]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=69420,stratify=Y)

In [38]:
under=RandomUnderSampler(sampling_strategy='majority')
x_train_under,y_train_under=under.fit_resample(x_train,y_train)

In [9]:
LRModel=SGDClassifier(random_state=69420,class_weight='balanced',alpha=0.001,penalty='l1',loss='log_loss',max_iter=1000)
LRpipe=Pipeline(steps=[('preprocessor', preprocessor),('model', LRModel)])

In [None]:
LRpipe.fit(x_train,y_train)

In [None]:
lrpredict=LRpipe.predict(x_test)
lrproba=LRpipe.predict_proba(x_test)

In [None]:
skm.f1_score(lrpredict,y_test)

In [10]:
from sklearn.naive_bayes import ComplementNB

In [48]:
NBModel=ComplementNB()
NBpipe=Pipeline(steps=[('preprocessor', NBpreprocessor),('model', NBModel)])

In [45]:
NBpipe.fit(x_train,y_train)

In [46]:
NBpredict=NBpipe.predict(x_test)
NBproba=NBpipe.predict_proba(x_test)

In [None]:
NBpredictunder=NBpipe.predict(x_test)

In [47]:
skm.f1_score(NBpredict,y_test,average='macro')

0.6050501096413343

In [49]:
HuModel=SGDClassifier(random_state=69420,class_weight='balanced',alpha=0.1,penalty='l2',loss='modified_huber',max_iter=1000)
Hupipe=Pipeline(steps=[('preprocessor', preprocessor),('model', HuModel)])

In [None]:
Hupipe.fit(x_train,y_train)

In [None]:
hupredict=Hupipe.predict(x_test)
huproba=Hupipe.predict_proba(x_test)

In [None]:
skm.f1_score(hupredict,y_test,average='macro')

In [None]:
corrs=pd.DataFrame()

In [None]:
corrs["lr"]=lrpredict
corrs["NB"]=NBpredict
corrs["hu"]=hupredict
corrs["Y"]=y_test.values

In [None]:
corrs.head()

In [None]:
corrs.corr()

In [50]:
xgbmodel = xgb.XGBClassifier(n_estimators=180,eta=0.1,scale_pos_weight=4.15042727104,max_depth=8,seed=69420)
xgbpipeline = Pipeline(steps=[('preprocessor', preprocessor),('model', xgbmodel)])
metamodel = xgb.XGBClassifier(n_estimators=180,eta=0.1,scale_pos_weight=4.15042727104,max_depth=8,seed=69420)
metapipeline = Pipeline(steps=[('preprocessor', preprocessor),('model', metamodel)])

In [None]:
xgbpipeline.fit(x_train_under,y_train_under)

In [None]:
xgbpredsunder=xgbpipeline.predict(x_test)

In [None]:
skm.accuracy_score(xgbpredsunder,y_test)

In [None]:
np.corrcoef(xgbpreds,xgbpredsunder)

In [None]:
corrs.corr()

In [192]:
def create_meta_dataset(data_x, yhats):
    meta_x=data_x.copy(deep=True)
    for yhat in yhats:
        yhat=np.asarray([yhat])
        meta_x=np.append(meta_x,np.transpose(yhat),axis=1)
    meta_x=pd.DataFrame(meta_x)
    return meta_x

In [133]:
def stack_prediction(yhats, meta_model, X):
    # make predictions
    meta_x=X.copy(deep=True)
        # create input dataset
    meta_xt=preprocessor.transform(meta_x)
    for yhat in yhats:
        yhat=np.asarray([yhat])
        meta_xt=np.append(meta_xt,np.transpose(yhat),axis=1)
    meta_x=pd.DataFrame(meta_xt)
    # predict
    return meta_model.predict(meta_x)

In [16]:
from sklearn.model_selection import StratifiedKFold

In [57]:
data_x, data_y = pd.DataFrame(), pd.Series(name="y")
lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu= list(),list(),list(),list(),list(),list(),list(),list(),list(),list()
models=[LRpipe,NBpipe,Hupipe,NNpipeline,xgbpipeline,LRpipeu,NBpipeu,Hupipeu,NNpipelineu,xgbpipelineu]
yhats=[lr_yhat, nb_yhat, hu_yhat, nn_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu]
kfold = StratifiedKFold(n_splits=10)
for train_ix, test_ix in kfold.split(x_train,y_train):
    # get data
    train_X, test_X = x_train.iloc[train_ix], x_train.iloc[test_ix]
    train_y, test_y = y_train.iloc[train_ix], y_train.iloc[test_ix]
    data_x=data_x.append(test_X,ignore_index=True)
    data_y=data_y.append(test_y,ignore_index=True)
    for model,yhat in zip(models,yhats):
        model.fit(train_X,train_y)
        yhat.append(model.predict_proba(test_X)[:,0])    

  data_x, data_y = pd.DataFrame(), pd.Series(name="y")


In [150]:
metamodel=xgb.XGBClassifier(n_estimators=180,eta=0.1,scale_pos_weight=4.15,max_depth=8,seed=69420)
metapipeline=Pipeline(steps=[('preprocessor', metapreprocessor),('model', metamodel)])

In [58]:
meta_X_trainfull = create_meta_dataset(data_x, [lr_yhat, nb_yhat, hu_yhat, nn_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu])
data_y_full=data_y

In [61]:
meta_X_trainfull.to_csv("stacktrainfull.csv",index=False)
data_y_full.to_csv("stacktrainfully.csv",index=False)

In [151]:
metamodel.fit(meta_X_trainfull,data_y)

In [155]:
LRpipe.fit(x_train,y_train)
preda=LRpipe.predict_proba(test_data)[:,0]
NBpipe.fit(x_train,y_train)
predb=NBpipe.predict_proba(test_data)[:,0]
Hupipe.fit(x_train,y_train)
predc=Hupipe.predict_proba(test_data)[:,0]
NNpipeline.fit(x_train,y_train)
predd=NNpipeline.predict_proba(test_data)[:,0]
xgbpipeline.fit(x_train,y_train)
prede=xgbpipeline.predict_proba(test_data)[:,0]
LRpipeu.fit(x_train,y_train)
predf=LRpipeu.predict_proba(test_data)[:,0]
NBpipeu.fit(x_train,y_train)
predg=NBpipeu.predict_proba(test_data)[:,0]
Hupipeu.fit(x_train,y_train)
predh=Hupipeu.predict_proba(test_data)[:,0]
#NNpipelineu.fit(x_train,y_train)
#predi=NNpipelineu.predict_proba(test_data)[:,0]
xgbpipelineu.fit(x_train,y_train)
predj=xgbpipelineu.predict_proba(test_data)[:,0]

In [156]:
preds=stack_prediction([preda,predb,predc,predd,prede,predf,predg,predh,predj], metamodel, test_data)

In [153]:
skm.f1_score(preds,y_test,average='macro')

0.6239379929809645

In [157]:
preds.sum()

16851

In [142]:
metamodel.feature_importances_

array([0.        , 0.        , 0.        , ..., 0.00225322, 0.0018945 ,
       0.00212495], dtype=float32)

In [None]:
metamodel.feature_importances_.shape

In [154]:
test=pd.read_csv('C:/Users/bossg/Downloads/Week8_test.csv')
test_data=test.drop(['id','PARTY_ID'],axis=1)

In [158]:
result = pd.DataFrame(test['id'], columns = ['id'])
result['TARGET']= preds


In [159]:
result.to_csv("stackxgbwith9.csv", index=False)

In [53]:
from sklearn.neural_network import MLPClassifier

In [54]:
NNmodel=MLPClassifier(alpha=0.001,early_stopping=True,hidden_layer_sizes=(512,1024,512))

In [55]:
NNpipeline=Pipeline(steps=[('preprocessor', NNpreprocessor),('model', NNmodel)])

In [106]:
LRModelu=SGDClassifier(random_state=69420,alpha=0.001,penalty='l1',loss='log_loss',max_iter=1000)
LRpipeu=Pipeline(steps=[('under',under),('preprocessor', upreprocessor),('model', LRModelu)])
NBModelu=ComplementNB()
NBpipeu=Pipeline(steps=[('under',under),('preprocessor', NBpreprocessor),('model', NBModelu)])
HuModelu=SGDClassifier(random_state=69420,alpha=0.1,penalty='l2',loss='modified_huber',max_iter=1000)
Hupipeu=Pipeline(steps=[('under',under),('preprocessor', upreprocessor),('model', HuModelu)])
xgbmodelu = xgb.XGBClassifier(n_estimators=180,eta=0.1,max_depth=8,seed=69420)
xgbpipelineu = Pipeline(steps=[('under',under),('preprocessor', upreprocessor),('model', xgbmodelu)])
NNmodelu=MLPClassifier(alpha=0.001,early_stopping=True,hidden_layer_sizes=(512,1024,512))
NNpipelineu=Pipeline(steps=[('under',under),('preprocessor', NNpreprocessor),('model', NNmodelu)])

In [24]:
Y.sum()

21532

In [27]:
import lightgbm

In [62]:
meta_X_trainfull

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1314,1315,1316,1317,1318,1319,1320,1321,1322,1323
0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.031538,0.618825,0.671661,0.637786,0.871190,0.825916,0.619658,0.597992,0.622422,0.696120
1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.023431,0.512727,0.610690,0.499134,0.841557,0.569495,0.539482,0.548270,0.545224,0.488838
2,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.030944,0.603553,0.708646,0.618666,0.875125,0.787631,0.632054,0.729020,0.659496,0.710656
3,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.018491,0.518418,0.677930,0.508502,0.856718,0.584377,0.537995,0.680732,0.569227,0.677445
4,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.043993,0.613947,0.680277,0.584766,0.888463,0.701670,0.685640,0.674894,0.597277,0.728873
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88714,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.024627,0.585326,0.701896,0.555297,0.899701,0.806511,0.552922,0.656134,0.571289,0.608189
88715,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.020645,0.521454,0.636909,0.432400,0.600886,0.627034,0.379373,0.572309,0.376178,0.374268
88716,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.028074,0.665058,0.719764,0.632980,0.904128,0.826995,0.646540,0.712203,0.647485,0.661123
88717,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.020999,0.471694,0.665288,0.443837,0.760395,0.203255,0.402396,0.661571,0.433062,0.368701


In [80]:
NBpreprocessor.transform(x_test).shape

(22180, 1315)

In [83]:
nbpred=NBpipe.predict(x_test)

In [89]:
skm.precision_score(nbpred,y_test)

0.28425452856479333

In [92]:
NNpreprocessor.transform(x_test).shape

(22180, 1315)

In [126]:
preprocessor.transform(x_test).shape

(22180, 1315)

In [108]:
LRpipeu.fit(x_train,y_train)

In [None]:
LRpipeu.fit(x_train,y_train)
NBpipeu.fit(x_train,y_train)
Hupipeu.fit(x_train,y_train)
NNpipelineu.fit(x_train,y_train)
xgbpipelineu.fit(x_train,y_train)

Unnamed: 0,V_1,V_2,V_3,V_4,V_5,V_6,V_7,V_8,V_9,V_10,...,V_473,V_474,V_475,V_476,V_477,V_478,V_479,V_480,V_481,V_482
7587,0,0,0,1,0,0,0,0,0,0.000000,...,0,0,0,0,0,0,0,492.623588,0.0,0.0
71092,0,0,0,0,0,0,0,0,0,0.000000,...,0,0,0,0,0,0,0,0.000000,0.0,0.0
11145,0,0,0,0,0,0,0,1,0,107.263220,...,0,0,0,0,0,0,0,0.000000,0.0,0.0
4020,0,0,0,0,1,0,0,0,0,68.076204,...,0,0,0,0,0,1,0,0.000000,0.0,0.0
38605,0,0,0,0,0,0,0,2,0,0.000000,...,0,0,0,0,0,0,0,2700.582152,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99868,0,0,0,0,4,0,0,0,0,0.000000,...,0,0,0,0,0,0,0,0.000000,0.0,0.0
45313,0,0,0,0,0,0,0,0,0,60.851518,...,1,0,0,0,0,2,0,960.013205,0.0,0.0
95216,0,0,0,0,0,0,0,0,1,30.725523,...,0,0,0,0,0,0,0,2402.196392,0.0,0.0
99993,0,0,0,0,0,0,0,0,0,13.478761,...,0,0,0,0,0,0,0,205.773768,0.0,0.0


In [160]:
data_x, data_y = pd.DataFrame(), pd.Series(name="y")
lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu= list(),list(),list(),list(),list(),list(),list(),list(),list(),list()
models=[LRpipe,NBpipe,Hupipe,NNpipeline,xgbpipeline,LRpipeu,NBpipeu,Hupipeu,NNpipelineu,xgbpipelineu]
yhats=[lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu]
kfold = StratifiedKFold(n_splits=10)
for train_ix, test_ix in kfold.split(x_train,y_train):
    # get data
    train_X, test_X = x_train.iloc[train_ix], x_train.iloc[test_ix]
    train_y, test_y = y_train.iloc[train_ix], y_train.iloc[test_ix]
    data_x=data_x.append(test_X,ignore_index=True)
    data_y=data_y.append(test_y,ignore_index=True)
    for model,yhat in zip(models,yhats):
        model.fit(train_X,train_y)
        yhat.append(model.predict_proba(test_X)[:,0])    

  data_x, data_y = pd.DataFrame(), pd.Series(name="y")


In [181]:
meta_X_trainfull = create_meta_dataset(data_x, [lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu])
data_y_full=data_y

In [162]:
meta_X_trainfull.to_csv("stacktrainfull.csv",index=False)
data_y_full.to_csv("stacktrainfully.csv",index=False)

In [163]:
LRpipe.fit(x_train,y_train)
preda=LRpipe.predict_proba(x_test)[:,0]
NBpipe.fit(x_train,y_train)
predb=NBpipe.predict_proba(x_test)[:,0]
Hupipe.fit(x_train,y_train)
predc=Hupipe.predict_proba(x_test)[:,0]
NNpipeline.fit(x_train,y_train)
predd=NNpipeline.predict_proba(x_test)[:,0]
xgbpipeline.fit(x_train,y_train)
prede=xgbpipeline.predict_proba(x_test)[:,0]
LRpipeu.fit(x_train,y_train)
predf=LRpipeu.predict_proba(x_test)[:,0]
NBpipeu.fit(x_train,y_train)
predg=NBpipeu.predict_proba(x_test)[:,0]
Hupipeu.fit(x_train,y_train)
predh=Hupipeu.predict_proba(x_test)[:,0]
NNpipelineu.fit(x_train,y_train)
predi=NNpipelineu.predict_proba(x_test)[:,0]
xgbpipelineu.fit(x_train,y_train)
predj=xgbpipelineu.predict_proba(x_test)[:,0]

In [184]:
meta_X_testfull = create_meta_dataset(x_test,[preda,predb,predc,predd,prede,predf,predg,predh,predi,predj])

In [168]:
meta_X_testfull.to_csv("stacktestfull.csv",index=False)

In [188]:
data_x, data_y = pd.DataFrame(), pd.Series(name="y")
lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu= list(),list(),list(),list(),list(),list(),list(),list(),list(),list()
models=[LRpipe,NBpipe,Hupipe,NNpipeline,xgbpipeline,LRpipeu,NBpipeu,Hupipeu,NNpipelineu,xgbpipelineu]
yhats=[lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu]
kfold = StratifiedKFold(n_splits=10)
for train_ix, test_ix in kfold.split(X,Y):
    # get data
    train_X, test_X = X.iloc[train_ix], X.iloc[test_ix]
    train_y, test_y = Y.iloc[train_ix], Y.iloc[test_ix]
    data_x=data_x.append(test_X,ignore_index=True)
    data_y=data_y.append(test_y,ignore_index=True)
    for model,yhat in zip(models,yhats):
        model.fit(train_X,train_y)
        yhat.append(model.predict_proba(test_X)[:,0])    

  data_x, data_y = pd.DataFrame(), pd.Series(name="y")


In [189]:
meta_X_subfull = create_meta_dataset(data_x, [lr_yhat, nb_yhat, hu_yhat, nn_yhat,xgb_yhat,lr_yhatu, nb_yhatu, hu_yhatu, nn_yhatu,xgb_yhatu])
data_y_subfull=data_y

In [190]:
meta_X_subfull.to_csv("stacksubfull.csv",index=False)
data_y_subfull.to_csv("stacksubfully.csv",index=False)

In [191]:
LRpipe.fit(X,Y)
preda=LRpipe.predict_proba(test_data)[:,0]
NBpipe.fit(X,Y)
predb=NBpipe.predict_proba(test_data)[:,0]
Hupipe.fit(X,Y)
predc=Hupipe.predict_proba(test_data)[:,0]
NNpipeline.fit(X,Y)
predd=NNpipeline.predict_proba(test_data)[:,0]
xgbpipeline.fit(X,Y)
prede=xgbpipeline.predict_proba(test_data)[:,0]
LRpipeu.fit(X,Y)
predf=LRpipeu.predict_proba(test_data)[:,0]
NBpipeu.fit(X,Y)
predg=NBpipeu.predict_proba(test_data)[:,0]
Hupipeu.fit(X,Y)
predh=Hupipeu.predict_proba(test_data)[:,0]
NNpipelineu.fit(X,Y)
predi=NNpipelineu.predict_proba(test_data)[:,0]
xgbpipelineu.fit(X,Y)
predj=xgbpipelineu.predict_proba(test_data)[:,0]

In [193]:
meta_Sub=create_meta_dataset(data_x,[preda,predb,predc,predd,prede,predf,predg,predh,predi,predj])

In [207]:
meta_Sub.to_csv("finalsubonthis.csv",index=False)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,480,481,482,483,484,485,486,487,488,489
0,0,0,0,0,0,0,0,0,0,0,...,0.679736,0.6882,0.668788,0.900036,0.74639,0.691894,0.707052,0.685049,0.749754,0.811481
1,0,0,0,0,0,0,0,0,0,110.039,...,0.598777,0.695921,0.643315,0.877278,0.843356,0.472842,0.675842,0.714533,0.631953,0.457423
2,0,0,0,0,0,0,0,0,0,0,...,0.584107,0.698089,0.554567,0.857488,0.548223,0.597923,0.679322,0.605284,0.73628,0.688046
3,0,0,0,0,2,0,0,0,0,103.289,...,0.530908,0.682033,0.563266,0.86751,0.719353,0.516188,0.673082,0.576077,0.592839,0.516183
4,0,0,0,0,0,0,0,0,0,0,...,0.550151,0.653687,0.55383,0.865653,0.786533,0.525782,0.696515,0.575732,0.631356,0.540358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110894,0,0,0,0,0,0,0,0,0,99.0374,...,0.426109,0.386041,0.451428,0.658401,0.495513,0.467256,0.423394,0.471621,0.32226,0.141699
110895,0,0,0,0,0,0,0,0,0,45.2348,...,0.529371,0.631679,0.50179,0.79502,0.476663,0.488468,0.67316,0.549674,0.449758,0.422342
110896,0,0,0,0,0,0,0,0,0,0,...,0.317568,0.464754,0.348491,0.655748,0.365785,0.368593,0.412338,0.271705,0.28842,0.282669
110897,0,0,0,0,2,0,0,0,0,125.156,...,0.514046,0.659041,0.492824,0.815592,0.455741,0.52341,0.645816,0.515085,0.407939,0.38763


In [173]:
metamodel=SGDClassifier(random_state=69420,class_weight='balanced',alpha=0.1,penalty='l2',loss='modified_huber',max_iter=1000)

In [186]:
metapipeline.fit(meta_X_trainfull,data_y_full)

ValueError: A given column is not a column of the dataframe

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1127,1128,1129,1130,1131,1132,1133,1134,1135,1136
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,4.697553e-01,0.658797,0.557112,0.839013,0.664032,5.664434e-01,0.655996,0.536009,0.524730,0.451393
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,6.188353e-01,0.698744,0.649098,0.883309,0.794910,6.536396e-01,0.686296,0.595221,0.705050,0.619614
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,5.643031e-01,0.676954,0.603433,0.854493,0.812062,6.135737e-01,0.637083,0.558544,0.577108,0.635964
3,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,5.159329e-01,0.615921,0.564004,0.807858,0.754349,5.609170e-01,0.598017,0.559978,0.535260,0.631627
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,5.062151e-01,0.697835,0.572949,0.859075,0.451381,5.083434e-01,0.687193,0.525644,0.622396,0.612515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22175,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,6.621118e-01,0.710400,0.757415,0.898840,0.700400,7.454531e-01,0.696564,0.685663,0.692431,0.726913
22176,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,4.482802e-01,0.633468,0.559499,0.835497,0.627696,5.010745e-01,0.708740,0.560066,0.712226,0.618361
22177,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,5.569443e-10,0.011207,0.375311,0.183202,0.008709,4.662937e-15,0.015543,0.341156,0.056396,0.003682
22178,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,7.015876e-01,0.651998,0.756759,0.874037,0.825551,7.438001e-01,0.652104,0.702206,0.703200,0.842590


In [197]:
meta_X_testfull

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,480,481,482,483,484,485,486,487,488,489
0,0,0,0,1,0,0,0,0,0,0,...,0.469755,0.658797,0.557112,0.839013,0.664032,0.566443,0.655996,0.536009,0.52473,0.451393
1,0,0,0,0,0,0,0,0,0,0,...,0.618835,0.698744,0.649098,0.883309,0.79491,0.65364,0.686296,0.595221,0.70505,0.619614
2,0,0,0,0,0,0,0,1,0,107.263,...,0.564303,0.676954,0.603433,0.854493,0.812062,0.613574,0.637083,0.558544,0.577108,0.635964
3,0,0,0,0,1,0,0,0,0,68.0762,...,0.515933,0.615921,0.564004,0.807858,0.754349,0.560917,0.598017,0.559978,0.53526,0.631627
4,0,0,0,0,0,0,0,2,0,0,...,0.506215,0.697835,0.572949,0.859075,0.451381,0.508343,0.687193,0.525644,0.622396,0.612515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22175,0,0,0,0,4,0,0,0,0,0,...,0.662112,0.7104,0.757415,0.89884,0.7004,0.745453,0.696564,0.685663,0.692431,0.726913
22176,0,0,0,0,0,0,0,0,0,60.8515,...,0.44828,0.633468,0.559499,0.835497,0.627696,0.501074,0.70874,0.560066,0.712226,0.618361
22177,0,0,0,0,0,0,0,0,1,30.7255,...,5.56944e-10,0.0112067,0.375311,0.183202,0.00870919,4.66294e-15,0.0155426,0.341156,0.0563959,0.00368202
22178,0,0,0,0,0,0,0,0,0,13.4788,...,0.701588,0.651998,0.756759,0.874037,0.825551,0.7438,0.652104,0.702206,0.7032,0.84259


In [198]:
metamodel.fit(meta_X_trainfull,data_y_full)

ValueError: could not convert string to float: 'E'

In [204]:
meta_X_trainfull.to_csv('C:/hack/training.csv')

In [206]:
meta_X_subfull.to_csv('C:/hack/submissiontrain.csv')

In [205]:
meta_X_testfull.to_csv('C:/hack/valid.csv')

In [208]:
data_y_full.to_csv('C:/hack/test.csv')

In [209]:
data_y_subfull.to_csv('C:/hack/testfull.csv')

In [211]:
y_test.to_csv('C:/hack/validy.csv')

In [214]:
meta_Sub.to_csv('C:/hack/predonthis.csv')