# GiveMeCredit Kaggle Submission

#### *Fonctions*

In [58]:
def features(df):
    df.loc[df.age > 80, 'isOld'] = '1' 
    df.loc[df.age <= 80, 'isOld'] = '0'
    df['IncomePerPerson'] = df['MonthlyIncome'] / ( df['NumberOfDependents'] + 1 )
    df['MonthlyDebt'] = df['MonthlyIncome'] * df['DebtRatio']
    df['MonthlyBalance'] = df['MonthlyIncome'] - df['MonthlyDebt']
    df['DebtPerPerson'] = df['MonthlyDebt'] / ( df['NumberOfDependents'] + 1 )
    df['BalancePerPerson'] = df['MonthlyBalance'] / ( df['NumberOfDependents'] + 1 )
    df['NumberOfTime30-89DaysPastDueNotWorse'] = df['NumberOfTime30-59DaysPastDueNotWorse'] + df['NumberOfTime60-89DaysPastDueNotWorse']
    df['NumbersOfOpen-NumberRealEstate'] = df['NumberOfOpenCreditLinesAndLoans'] - df['NumberRealEstateLoansOrLines']
    df = df.fillna(0)
    df.index.names = ['Id']
    return df

#### *Chargement du Dataset Full Give Me Credit - Kaggle*

In [59]:
from bigml.api import BigML
from pandas import read_csv



df = read_csv('https://oml-data.s3.amazonaws.com/kaggle-give-me-credit-train.csv', index_col=0)

#### *Connexion BigML*

In [60]:
api = BigML(project="project/5d94a3525a213962e20002f5") # AUTH dans docker/auth.env

#### *Modifications du dataset Trainfull*

In [61]:
df = features(df)
df.to_csv('files_csv/origin_dataset_modif.csv')
print("Load & Modifications OK")

Load & Modifications OK


#### *Création sur BIGML source -> dataset*

In [62]:
# Creation d'une source
source = api.create_source('files_csv/origin_dataset_modif.csv')
api.ok(source)
# Creation d'un dataset ( = source )
origin_dataset = api.create_dataset(source)
api.ok(origin_dataset)

True

#### *Split du trainfull en Train/Test*

In [63]:
train_dataset = api.create_dataset(
    origin_dataset, {"name": "GiveMeCredit | TrainFull | Training",
                     "sample_rate": 0.8, "seed": "my seed"})
test_dataset = api.create_dataset(
    origin_dataset, {"name": "GiveMeCredit | TrainFull | Test",
                     "sample_rate": 0.8, "seed": "my seed",
                     "out_of_bag": True})
print("Split OK")

Split OK


#### *Création d'un modele ensemble sur la partie Train du dataset ( objective_field = ce qu'on cherche à prédire)*

In [64]:
ensemble = api.create_ensemble(train_dataset , {"objective_field" : "SeriousDlqin2yrs"})
print("Création model OK")

Création model OK


#### *Création et téléchargement de l'évaluation de notre modèle*

In [65]:
evaluation = api.create_evaluation(ensemble, test_dataset)
api.export(evaluation, filename="EvaluationModel/my_evaluation_model.json")

'EvaluationModel/my_evaluation_model.json'

#### *Verification du modele sur les 20% du Train Full et téléchargement*


In [66]:
batch_prediction = api.create_batch_prediction(ensemble, test_dataset,{"header": True, "all_fields": True, "probabilities": True})
api.ok(batch_prediction)
api.download_batch_prediction(batch_prediction,filename='files_csv/GiveMeCredit_Review_Ensemble.csv')


'files_csv/GiveMeCredit_Review_Ensemble.csv'

#### *Modifications fichier test kaggle nouvelles features*

In [67]:
df_test_kaggle = read_csv('https://oml-data.s3.amazonaws.com/kaggle-give-me-credit-test.csv', index_col=0)

df_test_kaggle = features(df_test_kaggle)
df_test_kaggle.to_csv('files_csv/kaggle_dataset_modif.csv')
print("Load & Modifications OK")

Load & Modifications OK


#### *Création d'une source avec le fichier modifié test kaggle*

In [68]:
source_kaggle = api.create_source('files_csv/kaggle_dataset_modif.csv')
api.ok(source_kaggle)

True

#### *Création d'un dataset à partir de la source*

In [69]:
kaggle_dataset= api.create_dataset(source_kaggle)
api.ok(kaggle_dataset)

True

#### *Prédiction de notre modèle sur le fichier test de Kaggle et téléchargement*

In [70]:
batch_prediction_kaggle = api.create_batch_prediction(ensemble, kaggle_dataset,{"all_fields": True,"probabilities": True})
api.ok(batch_prediction_kaggle)
api.download_batch_prediction(batch_prediction_kaggle,filename='files_csv/GiveMeCredit_Prediction_Kaggle.csv')

'files_csv/GiveMeCredit_Prediction_Kaggle.csv'

#### *Formatage du fichier de prédiction au format attendu par Kaggle*

In [71]:
from pandas import read_csv

df_final_prediction = read_csv("files_csv/GiveMeCredit_Prediction_Kaggle.csv")
keep_col = ['Id','1 probability']
new_final_prediction = df_final_prediction[keep_col]
new_final_prediction.rename(columns={'1 probability':'Probability'}, inplace=True)
new_final_prediction.to_csv('files_csv/GiveMeCredit_Kaggle_format.csv', index=False)
print("Modifications OK")

Modifications OK


#### *Envoi de nos résultats à Kaggle*

In [72]:
import kaggle
submission_file = 'files_csv/GiveMeCredit_Kaggle_format.csv'
kaggle.api.competition_submit(submission_file, "BigML ensemble", "GiveMeSomeCredit")

100%|██████████| 1.66M/1.66M [00:05<00:00, 322kB/s]


Successfully submitted to Give Me Some Credit