In [1]:
import pandas as pd
import numpy as np

from stump import *

In [2]:
#converting input to the required format

from sklearn.impute import SimpleImputer

train = pd.read_csv('train_classif.csv')
test = pd.read_csv('test_classif.csv')

relevant_features = ['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']

imputer = SimpleImputer(strategy='most_frequent')
train[relevant_features] = imputer.fit_transform(train[relevant_features])
test[relevant_features] = imputer.transform(test[relevant_features])

#encoding
train['Sex'] = train['Sex'].map({'male':0, 'female':1})
test['Sex'] = test['Sex'].map({'male':0, 'female':1})
train['Embarked'] = train['Embarked'].map({'S':0,'C':1,'Q':2})
test['Embarked'] = test['Embarked'].map({'S':0,'C':1,'Q':2})

X = train[relevant_features].to_numpy()
X_test = test[relevant_features].to_numpy()
Y = train['Survived'].to_numpy()

In [3]:
class adaboost_for_classif:
    
    def __init__(self):
        self.stump_list = []
        
    def add_stump(self,X,Y,weights):
        
        o = stump()
        weights = o.best_stump(X,Y,weights,'classif')
        self.stump_list.append(o)
        
        return weights
                
    def build(self, X, Y, n_iter):
        
        # giving equal weight to all datapoints
        weights = [1/X.shape[0] for i in range(X.shape[0])]
        for i in range(n_iter):
            weights = self.add_stump(X,Y,weights)
    
    def predict_row(self, row):
        
        pred = 0
        s = 0
        
        for i in self.stump_list:
            outcome = 0.5
            if row[i.feature] <= i.threshold:
                outcome = i.left.classif
            else:
                outcome = i.right.classif
            
            pred += outcome * i.stump_weight
            s += i.stump_weight
        
        pred = pred/s
        return pred>0.5
    
    def predict(self, X_test):
        
        result = []
        
        for i in range(X_test.shape[0]):
            
            result.append(self.predict_row(X_test[i]))
            
        return result

In [4]:
o = adaboost_for_classif()
o.build(X,Y,10)

[0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0026315789473684227, 0.0007132667617688992, 0.0007132667617688992, 0.0026315789473684227, 0.0026315789473684227, 0.0007132667617688992, 0.0007132667617688992, 0.0026315789473684227, 0.0007132667617688992, 0.0026315789473684227, 0.0026315789473684227, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0007132667617688992, 0.0026315789473684227, 0.0007132667617688992, 0.0026315789473684227, 0.0007132667617688992, 0.0026315789473684227, 0.0026315789473684227, 0.0007132667617688992, 0.00071326

In [5]:
test['Survived'] = o.predict(X_test)
submissions = test[['PassengerId', 'Survived']]

submissions.to_csv('submission.csv', index=False )

0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
1
1
0
1
0
1
1
1
1
1
0
1
0
1
0
0
0
0
0
0
1
1
0
1
0
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
1
1
0
1
1
1
1
1
0
0
0
0
1
0
0
0
0
0
1
1
0
1
0
0
0
0
0
0
0
1
1
1
0
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
0
1
1
1
1
1
0
0
1
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
1
0
0
1
0
0
0
0
0
0
1
0
0
1
0
0
0
0
0
0
0
1
1
1
0
1
1
1
1
1
0
0
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
1
1
1
1
1
0
0
1
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
1
1
0
1
0
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
1
1
1
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
1
1
1
0
1
1
1
1
1
1
0
0
1
0
0
0
0
0
0


In [6]:
print(test['Survived'].sum())

131
