# Predicting credit repayment probability

In [1]:
import bigml.api

In [2]:
# Setting the BigMl Api connection
api = bigml.api.BigML(project='project/5db1644859f5c33b3c00076c')
ensemble = api.get_ensemble("ensemble/5dc43212e476847468018222")

In [3]:
# RAW datas
data_to_predict_on = {
    "RevolvingUtilizationOfUnsecuredLines": 0.01703559,
    "NumberOfDependents": 1,
    "DebtRatio": 0,
    "age": 42,
    "NumberOfOpenCreditLinesAndLoans": 6,
    "NumberRealEstateLoansOrLines": 1,
    "NumberOfTime30-59DaysPastDueNotWorse": 1,
    "NumberOfTime60-89DaysPastDueNotWorse": 0,
    "NumberOfTimes90DaysLate": 0
}

raw_data = data_to_predict_on.copy()

Get the colmuns

In [4]:
def get_columns(data):
    return data.keys()

In [5]:
data_columns = get_columns(data_to_predict_on)

automatisation to add features to the datas

In [6]:
def add_features(data):
    if not "MonthlyIncome" in data_columns:
        data_to_predict_on["MonthlyIncome"] = 0
    data_to_predict_on['NumberOfTimes30DaysOrMoreLate'] = data_to_predict_on['NumberOfTime30-59DaysPastDueNotWorse']+data_to_predict_on['NumberOfTime60-89DaysPastDueNotWorse']+data_to_predict_on['NumberOfTimes90DaysLate']
    data_to_predict_on['IncomePerPerson'] = data_to_predict_on['MonthlyIncome']/(data_to_predict_on['NumberOfDependents']+1)
    data_to_predict_on['MonthlyDebt'] = data_to_predict_on['MonthlyIncome']*data_to_predict_on['DebtRatio']
    data_to_predict_on['MonthlyBalance'] = data_to_predict_on['MonthlyIncome']-data_to_predict_on['MonthlyDebt']

In [7]:
add_features(data_to_predict_on)
print(data_to_predict_on)
print(raw_data)

{'RevolvingUtilizationOfUnsecuredLines': 0.01703559, 'NumberOfDependents': 1, 'DebtRatio': 0, 'age': 42, 'NumberOfOpenCreditLinesAndLoans': 6, 'NumberRealEstateLoansOrLines': 1, 'NumberOfTime30-59DaysPastDueNotWorse': 1, 'NumberOfTime60-89DaysPastDueNotWorse': 0, 'NumberOfTimes90DaysLate': 0, 'MonthlyIncome': 0, 'NumberOfTimes30DaysOrMoreLate': 1, 'IncomePerPerson': 0.0, 'MonthlyDebt': 0, 'MonthlyBalance': 0}
{'RevolvingUtilizationOfUnsecuredLines': 0.01703559, 'NumberOfDependents': 1, 'DebtRatio': 0, 'age': 42, 'NumberOfOpenCreditLinesAndLoans': 6, 'NumberRealEstateLoansOrLines': 1, 'NumberOfTime30-59DaysPastDueNotWorse': 1, 'NumberOfTime60-89DaysPastDueNotWorse': 0, 'NumberOfTimes90DaysLate': 0}


making the prediction

In [8]:
prediction = api.create_prediction(ensemble,
                                 data_to_predict_on,
                                  {"name": "credit prediction"})

In [9]:
api.pprint(prediction["object"])

{   'boosted_ensemble': False,
    'category': 0,
    'code': 201,
    'combiner': None,
    'confidence': 0.94176,
    'confidence_bounds': {},
    'confidences': [['0', 0.94176], ['1', 0.03264]],
    'configuration': None,
    'configuration_status': False,
    'created': '2019-11-15T15:57:41.868299',
    'creator': 'nicookie',
    'credits': 0.01,
    'dataset': 'dataset/5dc431b9e476847468018212',
    'dataset_status': True,
    'description': '# Exo kaggle python api.BigML()',
    'ensemble': 'ensemble/5dc43212e476847468018222',
    'error_predictions': 0,
    'explanation': None,
    'fields': {   '000001': {   'column_number': 1,
                                'datatype': 'string',
                                'name': 'SeriousDlqin2yrs',
                                'optype': 'categorical',
                                'order': 1,
                                'preferred': True,
                                'term_analysis': {'enabled': True}},
                  '00

                           'probabilities': [['0', 0.97784], ['1', 0.02216]],
                           'probability': 0.97784,
                           'total_count': 120000}],
    'private': True,
    'probabilities': [['0', 0.95641], ['1', 0.04359]],
    'probability': 0.95641,
    'project': 'project/5db1644859f5c33b3c00076c',
    'query_string': '',
    'resource': 'prediction/5dcecaf566a9743d510c10a3',
    'shared': False,
    'source': 'source/5dc431835299631c82016d88',
    'source_status': True,
    'status': {   'code': 5,
                  'elapsed': 1424.0,
                  'message': 'The prediction has been created',
                  'progress': 1.0},
    'subscription': False,
    'tags': [],
    'task': 'classification',
    'type': 0,
    'updated': '2019-11-15T15:57:41.868327',
    'vote_count': 1.0,
    'vote_counts': [['0', 1], ['1', 0]]}


Le modèle prédit "0" donc False sur le fait de ne pas rembourser.

In [10]:
raw_prediction = api.create_prediction(ensemble,
                           raw_data,
                           {"name": "prediction on raw data"})

In [11]:
api.pprint(raw_prediction["object"])

{   'boosted_ensemble': False,
    'category': 0,
    'code': 201,
    'combiner': None,
    'confidence': 0.93213,
    'confidence_bounds': {},
    'confidences': [['0', 0.93213], ['1', 0.06505]],
    'configuration': None,
    'configuration_status': False,
    'created': '2019-11-15T15:57:44.494038',
    'creator': 'nicookie',
    'credits': 0.01,
    'dataset': 'dataset/5dc431b9e476847468018212',
    'dataset_status': True,
    'description': '# Exo kaggle python api.BigML()',
    'ensemble': 'ensemble/5dc43212e476847468018222',
    'error_predictions': 0,
    'explanation': None,
    'fields': {   '000001': {   'column_number': 1,
                                'datatype': 'string',
                                'name': 'SeriousDlqin2yrs',
                                'optype': 'categorical',
                                'order': 1,
                                'preferred': True,
                                'term_analysis': {'enabled': True}},
                  '00