In [14]:
import pandas as pd
import joblib as jb
from sklearn import ensemble, model_selection, pipeline, metrics

In [6]:
df = pd.read_csv('spaceTrain.csv')
# removing useless columns
features = df.drop(['PassengerId','Cabin','Name','Transported'],axis = 1)
target = df.Transported
numerical_features = [feat for feat in features if features[feat].dtypes !='O']
categorical_features = [feat for feat in features if feat not in numerical_features]

## Split your dataset into training and testing dataset
x_train,x_test,y_train,y_test = model_selection.train_test_split(features,target,test_size = 0.2,stratify=target)
cleanIt = jb.load('clean.joblib')

In [7]:
parameters = {
    'n_estimators': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20],
    'learning_rate': [(0.97 + x / 100) for x in range(0, 8)],
    'algorithm': ['SAMME', 'SAMME.R']
}

clf = ensemble.AdaBoostClassifier()

grid = model_selection.GridSearchCV(clf,parameters,cv=5,verbose=0)
grid.fit(cleanIt.fit_transform(x_train),y_train)

In [8]:
grid.best_estimator_,grid.best_params_,grid.best_score_

(AdaBoostClassifier(learning_rate=1.04, n_estimators=20),
 {'algorithm': 'SAMME.R', 'learning_rate': 1.04, 'n_estimators': 20},
 0.7807012190391468)

In [9]:
model = ensemble.AdaBoostClassifier(n_estimators=20,learning_rate=1.04,algorithm='SAMME.R')

In [10]:
## Let's fit it into pipeline
Final_pipeline = pipeline.make_pipeline(cleanIt,model)

In [11]:
Final_pipeline.fit(x_train,y_train)

In [12]:
y_pred = Final_pipeline.predict(x_test)

In [15]:
accuracy = metrics.accuracy_score(y_test,y_pred)
roc_auc = metrics.roc_auc_score(y_test,y_pred)
f1score = metrics.f1_score(y_test,y_pred)

In [16]:
accuracy,roc_auc,f1score

(0.7883841288096607, 0.7884840764668223, 0.7867902665121669)