# BigML Kaggle API 

## Import needed libraries

In [None]:
import pandas as pd
import bigml.api
from pprint import pprint

## Connect to BigML API

In [None]:
api = bigml.api.BigML()

## Create source and dataset for train and test

In [None]:
full_train_source = api.create_source('./datasets/trainfull.csv')
api.ok(full_train_source)

In [None]:
full_train_dataset = api.create_dataset(full_train_source)
api.ok(full_train_dataset)

In [None]:
test_source = api.create_source('./datasets/test.csv')
api.ok(test_source)

In [None]:
test_dataset = api.create_dataset(test_source)
api.ok(test_dataset)

## Split trainfull into train and val

In [None]:
dataset_train = api.create_dataset(full_train_dataset,{"name":"Train", "sample_rate":0.8, "seed":"my seed"})
api.ok(dataset_train)

In [None]:
dataset_val = api.create_dataset(full_train_dataset, {"name":"Validation", "sample_rate":0.8, "seed":"my seed", "out_of_bag":True})
api.ok(dataset_val)

## Create Ensemble

In [None]:
ensemble = api.create_ensemble(dataset_train, {"objective_field":"SeriousDlqin2yrs"})
api.ok(ensemble)

## Batch prediction

In [None]:
batch_prediction = api.create_batch_prediction(ensemble, test_dataset, {'output_fields':['Id'], 'probabilities':True})
api.ok(batch_prediction)

In [None]:
api.download_batch_prediction(batch_prediction, filename= './results/my_predictions.csv')

## Get metrics

In [None]:
evaluation = api.create_evaluation(ensemble, dataset_val)
api.ok(evaluation)

In [None]:
matrice_confusion = evaluation['object']['result']['model']['confusion_matrix']
pprint(matrice_confusion)

In [None]:
roc = evaluation['object']['result']['model']['average_area_under_roc_curve']
pprint(roc)

## Data formating

Formating data as required by the Kaggle competition rules.

In [None]:
my_prediction_df = pd.read_csv('./results/my_predictions.csv', index_col = 0)
my_prediction_df = my_prediction_df.drop(columns=['SeriousDlqin2yrs','0 probability'])
my_prediction_df = my_prediction_df.rename(columns={"1 probability": "Probability"})
my_prediction_df

## Save Prediction

In [None]:
my_prediction_df.to_csv('./results/kaggle_sub_bigml.csv')