# Creating Contest Submissions

In [1]:
import pandas as pd
from scripts import get_metrics
from sklearn.ensemble import BaggingClassifier

In [2]:
df = pd.read_csv('data/cleaned_data.csv', index_col='id')
y = df['status_group']
X = df.drop(['status_group'], axis=1)

In [3]:
X_submission = pd.read_csv('data/X_submission_cleaned.csv', index_col='id')

## K Nearest Neighbors

In [4]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
knn = KNeighborsClassifier(n_neighbors=12, weights='distance', leaf_size=15, p=1)
knn.fit(X, y)

KNeighborsClassifier(leaf_size=15, n_neighbors=12, p=1, weights='distance')

In [6]:
y_submission = pd.DataFrame(knn.predict(X_submission), index=X_submission.index, columns=['status_group'])
y_submission.to_csv('data/submissions/k_nearest_neighbors.csv')

79.99% accuracy upon submission.

## Random Forest

In [4]:
from sklearn.ensemble import RandomForestClassifier

In [5]:
forest = BaggingClassifier(RandomForestClassifier(n_estimators=500, max_depth=None, max_features=15))  
forest.fit(X, y)

BaggingClassifier(base_estimator=RandomForestClassifier(max_features=15,
                                                        n_estimators=500))

In [12]:
y_submission = pd.DataFrame(forest.predict(X_submission), index=X_submission.index, columns=['status_group'])
y_submission.to_csv('data/submissions/bagged_random_forests.csv')

81.49% accuracy upon submission.

## XGBoost

In [13]:
from xgboost import XGBClassifier

In [14]:
xgb = XGBClassifier(eta=0.05, gamma=0, max_depth=12, min_child_weight=0.75, subsample=0.75)
xgb.fit(X, y)

XGBClassifier(eta=0.05, max_depth=12, min_child_weight=0.75,
              objective='multi:softprob', subsample=0.75)

In [17]:
y_submission = pd.DataFrame(xgb.predict(X_submission), index=X_submission.index, columns=['status_group'])
y_submission.to_csv('data/submissions/xg_boost.csv')

81.50% accuracy upon submission.

## Support Vector Machines

In [4]:
from sklearn.svm import SVC

In [5]:
svc = BaggingClassifier(SVC(C=5, kernel='rbf', gamma=0.1, class_weight=None))  
svc.fit(X, y)

BaggingClassifier(base_estimator=SVC(C=5, gamma=0.1))

In [6]:
y_submission = pd.DataFrame(svc.predict(X_submission), index=X_submission.index, columns=['status_group'])
y_submission.to_csv('data/submissions/support_vector_machines.csv')

78.04% accuracy upon submission.