### Import Necessary libraries

In [1]:
import json
import pandas as pd
import numpy as np
from typing import List, Dict, Union
from datetime import datetime

### Load the JSON file 

In [2]:
# Load the JSON file
with open("Senior_datascientist_assessment[7591]/Credit_bureau_sample_data.json") as f:
    credit_reports = json.load(f)

### Visualize the data in the JSON 

In [3]:
credit_reports

[{'application_id': 97,
  'data': {'consumerfullcredit': {'subjectlist': {'reference': '12876566',
     'consumerid': '17628566',
     'searchoutput': 'XXX '},
    'accountrating': {'noofotheraccountsbad': '0',
     'noofotheraccountsgood': '3',
     'noofretailaccountsbad': '0',
     'noofretailaccountsgood': '2',
     'nooftelecomaccountsbad': '0',
     'noofautoloanaccountsbad': '0',
     'noofautoloanccountsgood': '0',
     'noofhomeloanaccountsbad': '0',
     'nooftelecomaccountsgood': '0',
     'noofhomeloanaccountsgood': '0',
     'noofjointloanaccountsbad': '0',
     'noofstudyloanaccountsbad': '0',
     'noofcreditcardaccountsbad': '0',
     'noofjointloanaccountsgood': '0',
     'noofstudyloanaccountsgood': '0',
     'noofcreditcardaccountsgood': '1',
     'noofpersonalloanaccountsbad': '0',
     'noofpersonalloanaccountsgood': '1'},
    'enquirydetails': {'productid': '45',
     'matchingrate': '90',
     'subscriberenquiryengineid': '5012874225',
     'subscriberenquiryresu

The decision making data for each applicant are stored in the key 'consumerfullcredit' within the key 'data' in the JSON data

The categories of decision making data are computed as follows:

In [4]:
set(credit_reports[0]['data']['consumerfullcredit'])

{'accountmonthlypaymenthistory',
 'accountmonthlypaymenthistoryheader',
 'accountrating',
 'creditaccountsummary',
 'creditagreementsummary',
 'deliquencyinformation',
 'employmenthistory',
 'enquirydetails',
 'enquiryhistorytop',
 'guarantorcount',
 'guarantordetails',
 'personaldetailssummary',
 'subjectlist',
 'telephonehistory'}

It's important to observe the info in each categories

1. Personal details Summary

In [5]:
credit_reports[0]['data']['consumerfullcredit']['personaldetailssummary']

{'gender': 'Male',
 'header': 'PERSONAL DETAILS SUMMARY: XXX',
 'surname': 'XXX',
 'birthdate': '06/05/1991',
 'firstname': 'XXX',
 'otheridno': None,
 'cellularno': 'XXX',
 'consumerid': '128566',
 'dependants': '0',
 'othernames': 'XXX',
 'pencomidno': None,
 'nationality': 'Nigeria',
 'emailaddress': None,
 'nationalidno': None,
 'employerdetail': 'ALL MILITARY STAFFS',
 'postaladdress1': 'XXX ',
 'postaladdress2': 'KADUNA STATE NIGERIA',
 'postaladdress3': None,
 'postaladdress4': {'xml:space': 'preserve', '__content__': ' '},
 'hometelephoneno': 'XXX',
 'propertyownedtype': None,
 'bankverificationno': 'XXX',
 'residentialaddress1': 'XXX ',
 'residentialaddress2': 'KADUNA STATE  NIGERIA',
 'residentialaddress3': None,
 'residentialaddress4': {'xml:space': 'preserve', '__content__': ' '}}

<br>

2.Employment History

In [6]:
credit_reports[0]['data']['consumerfullcredit']['employmenthistory']

[{'occupation': 'PUBLIC SERVANTS',
  'updatedate': '16/09/2016',
  'updateondate': '16/09/2016',
  'employerdetail': 'ALL MILITARY STAFFS'},
 {'occupation': 'PUBLIC SERVANTS'},
 {'occupation': 'PUBLIC SERVANTS'},
 {'occupation': 'Army Police Defence', 'employerdetail': 'Nigerian navy'},
 {'occupation': 'PUBLIC SERVANTS'}]

<br>

3. Telephone history

In [7]:
credit_reports[0]['data']['consumerfullcredit']['telephonehistory']

[{'homenoupdatedondate': '08/04/2016',
  'hometelephonenumber': 'XXX',
  'mobiletelephonenumber': 'XXX'},
 {'homenoupdatedondate': '18/01/2016',
  'hometelephonenumber': 'XXX',
  'mobiletelephonenumber': 'XXX'},
 {'homenoupdatedondate': '27/08/2015',
  'hometelephonenumber': 'XXX',
  'mobiletelephonenumber': 'XXX'}]

<br>

4. Gaurantor Count

In [8]:
credit_reports[0]['data']['consumerfullcredit']['guarantorcount']

{'accounts': '0', 'guarantorssecured': '0'}

<br>

5. Gaurantor Details

In [9]:
credit_reports[0]['data']['consumerfullcredit']['guarantordetails']

{'guarantorgender': None,
 'guarantorotherid': None,
 'guarantoraddress1': None,
 'guarantoraddress2': None,
 'guarantoraddress3': None,
 'guarantorpassport': None,
 'guarantorfirstname': None,
 'guarantorothername': None,
 'guarantordateofbirth': '1900-01-01T00:00:00+01:00',
 'guarantornationalidno': None,
 'guarantorhometelephone': None,
 'guarantorworktelephone': None,
 'guarantordriverlicenceno': None,
 'guarantormobiletelephone': None}

<br>

6. delinquency information

In [10]:
credit_reports[0]['data']['consumerfullcredit']['deliquencyinformation']

{'accountno': '2150224012',
 'periodnum': '20140930',
 'subscribername': 'First City Monument Bank Ltd Lagos',
 'monthsinarrears': '13'}

<br>

7.Account Rating

In [11]:
credit_reports[0]['data']['consumerfullcredit']['accountrating']

{'noofotheraccountsbad': '0',
 'noofotheraccountsgood': '3',
 'noofretailaccountsbad': '0',
 'noofretailaccountsgood': '2',
 'nooftelecomaccountsbad': '0',
 'noofautoloanaccountsbad': '0',
 'noofautoloanccountsgood': '0',
 'noofhomeloanaccountsbad': '0',
 'nooftelecomaccountsgood': '0',
 'noofhomeloanaccountsgood': '0',
 'noofjointloanaccountsbad': '0',
 'noofstudyloanaccountsbad': '0',
 'noofcreditcardaccountsbad': '0',
 'noofjointloanaccountsgood': '0',
 'noofstudyloanaccountsgood': '0',
 'noofcreditcardaccountsgood': '1',
 'noofpersonalloanaccountsbad': '0',
 'noofpersonalloanaccountsgood': '1'}

<br>

8. creditaccountsummary

In [12]:
credit_reports[0]['data']['consumerfullcredit']['creditaccountsummary']

{'rating': '13',
 'amountarrear': '24,041.00',
 'amountarrear1': '0.00',
 'totalaccounts': '7',
 'totalaccounts1': '0',
 'lastjudgementdate': '-',
 'lastjudgementdate1': '-',
 'totalaccountarrear': '2',
 'totalaccountarrear1': '0',
 'totaljudgementamount': '0',
 'totaloutstandingdebt': '105,435.00',
 'totaljudgementamount1': '0',
 'totaloutstandingdebt1': '0.00',
 'totaldishonouredamount': '0.00',
 'totalmonthlyinstalment': '77,404.00',
 'totalnumberofjudgement': '0',
 'totaldishonouredamount1': '0.00',
 'totalmonthlyinstalment1': '0.00',
 'totalnumberofjudgement1': '0',
 'totalnumberofdishonoured': '0',
 'totalnumberofdishonoured1': '0',
 'totalaccountingodcondition': '0',
 'totalaccountingodcondition1': '0'}

<br>

9. enquirydetails

In [13]:
credit_reports[0]['data']['consumerfullcredit']['enquirydetails']

{'productid': '45',
 'matchingrate': '90',
 'subscriberenquiryengineid': '5012874225',
 'subscriberenquiryresultid': '6381470'}

<br>

10. enquiryhistorytop

In [14]:
credit_reports[0]['data']['consumerfullcredit']['enquiryhistorytop']

[{'daterequested': '06/08/2020 18:05:36',
  'enquiryreason': 'Credit scoring of the client by credit bureau',
  'subscribername': 'CDL - Credit Direct API',
  'subscriberenquiryresultid': '6050316'},
 {'daterequested': '01/08/2020 06:59:09',
  'enquiryreason': 'Credit scoring of the client by credit bureau',
  'subscribername': 'CDL - Credit Direct API',
  'subscriberenquiryresultid': '5983396'},
 {'daterequested': '08/07/2020 15:58:44',
  'enquiryreason': 'application of existing credit by a borrower',
  'subscribername': 'NewEdge Finance Limited  - transsnet',
  'subscriberenquiryresultid': '5680296'},
 {'daterequested': '03/07/2020 22:58:39',
  'enquiryreason': 'Credit scoring of the client by credit bureau',
  'subscribername': 'CDL - Credit Direct API',
  'subscriberenquiryresultid': '5637826'},
 {'daterequested': '04/06/2020 07:55:58',
  'enquiryreason': 'Credit scoring of the client by credit bureau',
  'subscribername': 'CDL - Credit Direct API',
  'subscriberenquiryresultid': 

<br>

11. accountmonthlypaymenthistory

In [15]:
credit_reports[0]['data']['consumerfullcredit']['accountmonthlypaymenthistory']

[{'m01': '#',
  'm02': '3',
  'm03': '0',
  'm04': '0',
  'm05': '0',
  'm06': '0',
  'm07': '36',
  'm08': '36',
  'm09': '0',
  'm10': '31',
  'm11': '0',
  'm12': '9',
  'm13': '1',
  'm14': '0',
  'm15': '0',
  'm16': '3',
  'm17': '5',
  'm18': '0',
  'm19': '4',
  'm20': '4',
  'm21': '1',
  'm22': '0',
  'm23': '4',
  'm24': '3',
  'header': 'Details of Credit Agreement with "First City Monument Bank Ltd Lagos" for Account Number: 0324562636176001',
  'currency': 'NGN',
  'accountno': '0324562636176001',
  'tablename': 'Consumer24MonthlyPayment',
  'closeddate': '01/11/2020',
  'accountnote': None,
  'displaytext': 'Consumer 24 Monthly Payment',
  'loanduration': '1170 Day(s)',
  'subaccountno': None,
  'accountstatus': 'Open',
  'amountoverdue': '22,441.39',
  'subscribername': 'First City Monument Bank Ltd Lagos',
  'lastpaymentdate': '01/08/2020',
  'lastupdateddate': '07/08/2020',
  'currentbalanceamt': '82,733.58',
  'dateaccountopened': '15/01/2016',
  'openingbalanceamt':

<br>

12. accountmonthlypaymenthistoryheader

In [16]:
credit_reports[0]['data']['consumerfullcredit']['accountmonthlypaymenthistoryheader']

{'mh01': '2020\nAUG',
 'mh02': '2020\nJUL',
 'mh03': '2020\nJUN',
 'mh04': '2020\nMAY',
 'mh05': '2020\nAPR',
 'mh06': '2020\nMAR',
 'mh07': '2020\nFEB',
 'mh08': '2020\nJAN',
 'mh09': '2019\nDEC',
 'mh10': '2019\nNOV',
 'mh11': '2019\nOCT',
 'mh12': '2019\nSEP',
 'mh13': '2019\nAUG',
 'mh14': '2019\nJUL',
 'mh15': '2019\nJUN',
 'mh16': '2019\nMAY',
 'mh17': '2019\nAPR',
 'mh18': '2019\nMAR',
 'mh19': '2019\nFEB',
 'mh20': '2019\nJAN',
 'mh21': '2018\nDEC',
 'mh22': '2018\nNOV',
 'mh23': '2018\nOCT',
 'mh24': '2018\nSEP',
 'company': 'Company',
 'tablename': 'Consumer24MonthlyPaymentHeader',
 'displaytext': 'Consumer 24 Monthly Payment Header'}

<br>

13. subjectlist

In [17]:
credit_reports[0]['data']['consumerfullcredit']['subjectlist']

{'reference': '12876566', 'consumerid': '17628566', 'searchoutput': 'XXX '}

<br>

14. creditagreementsummary

In [18]:
credit_reports[0]['data']['consumerfullcredit']['creditagreementsummary']

[{'currency': 'NGN',
  'accountno': '0324562636176001',
  'closeddate': '01/11/2020',
  'loanduration': '1170',
  'subaccountno': None,
  'accountstatus': 'Open',
  'amountoverdue': '22,441.39',
  'subscribername': 'First City Monument Bank Ltd Lagos',
  'lastupdateddate': '07/08/2020',
  'instalmentamount': '24,368.39',
  'currentbalanceamt': '82,733.58',
  'dateaccountopened': '15/01/2016',
  'openingbalanceamt': '814,700.00',
  'performancestatus': 'Performing',
  'repaymentfrequency': 'Monthly',
  'indicatordescription': 'Personal secured loan'},
 {'currency': 'NGN',
  'accountno': '0324602636176001',
  'closeddate': '30/11/2019',
  'loanduration': '59',
  'subaccountno': None,
  'accountstatus': 'Closed',
  'amountoverdue': '0.00',
  'subscribername': 'First City Monument Bank Ltd Lagos',
  'lastupdateddate': '08/08/2017',
  'instalmentamount': '17,036.09',
  'currentbalanceamt': '0.00',
  'dateaccountopened': '28/08/2014',
  'openingbalanceamt': '704,000.00',
  'performancestatus

<br>

## Function to extract necessary information

In [19]:
def extract_summary(report):
    credit_data = report['data']['consumerfullcredit']

    risk_summary_data={}

    # Firstly, we identify relevent information from the personal details summary
    # 1. gender
    # 2. date of birth
    # 3. nationality
    # 4. dependants
    personaldetails_relevant_info = ["gender","birthdate","nationality","dependants"]

    # Extract the relevant information from credit data
    for info in personaldetails_relevant_info:
      risk_summary_data[info]=credit_data["personaldetailssummary"][info]

    # 2. Employment history
    # Next, extracting information relating to the employment history
    # extract the list of occupation
    occupations = []
    for occupation in credit_data['employmenthistory']:
      occupations.append(occupation['occupation'])

    risk_summary_data['oldest occupation']=occupations[0]  # get the oldest  occupation
    risk_summary_data['newest occupation']=occupations[-1] # get the newest  occupation


    # 3. Telephon history
    #  Here, we compute how long the user has used the phone numbers : oldest and newest

    telephonehistory_home = []
    telephonehistory_mobile = []
    today = datetime.today()
    telephonehistory_home_duration=[]
    telephonehistory_mobile_duration=[]

    for s in credit_data['telephonehistory']:
        if 'homenoupdatedondate' in s:
            telephonehistory_home.append(s['homenoupdatedondate'])
            x=datetime.strptime(s['homenoupdatedondate'],'%d/%m/%Y')
            telephonehistory_home_duration.append((today-x).days)

        if 'mobilenoupdatedondate' in s:
            telephonehistory_mobile.append(s['mobilenoupdatedondate'])

            x=datetime.strptime(s['mobilenoupdatedondate'],'%d/%m/%Y')
            telephonehistory_mobile_duration.append((today-x).days)

    risk_summary_data['no_mobilenoupdatedondate'] = len(telephonehistory_mobile)

    risk_summary_data['no_homenoupdatedondate'] = len(telephonehistory_home)

    risk_summary_data['telephonehistory_mobile_duration_max'] = max(telephonehistory_home_duration) if len(telephonehistory_home_duration) else None

    risk_summary_data['telephonehistory_mobile_duration_min'] = min(telephonehistory_mobile_duration) if len(telephonehistory_mobile_duration) else None

    risk_summary_data['telephonehistory_home_duration_max'] = max(telephonehistory_home_duration) if len(telephonehistory_home_duration) else None

    risk_summary_data['telephonehistory_home_duration_min'] = min(telephonehistory_home_duration) if len(telephonehistory_home_duration) else None

    # 4. Gaurator count

    risk_summary_data['guarantor_counts_guarantorssecured']=credit_data['guarantorcount']['guarantorssecured']
    risk_summary_data['guarantor_counts_accounts']=credit_data['guarantorcount']['accounts']

    # 5. Gaurantor details
    # Here, we check if the gaurantor details is available
    risk_summary_data['Is_gaurantor_firstname_available']=credit_data['guarantordetails']['guarantorfirstname'] is not None

    risk_summary_data['Is_gaurantor_phone_number_available']=credit_data['guarantordetails']['guarantormobiletelephone'] is not None \
                                                              or credit_data['guarantordetails']['guarantorhometelephone'] is not None \
                                                              or credit_data['guarantordetails']['guarantorworktelephone'] is not None


    # 6. delinquency information
    risk_summary_data['monthsinarrears']=credit_data['deliquencyinformation']['monthsinarrears']


    # 7.Account Rating
    for rating in credit_data['accountrating']:
        risk_summary_data[rating]=float(credit_data['accountrating'][rating])

    # 8. creditaccountsummary
    for rating in credit_data['creditaccountsummary']:
        try:
          risk_summary_data[rating]=float(credit_data['creditaccountsummary'][rating].replace(',',''))
        except:
          risk_summary_data[rating]=None

    #9. enquirydetails
    risk_summary_data['matchingrate']=credit_data['enquirydetails']['matchingrate']

    # 10. enquiryhistorytop
    risk_summary_data['No_credit_report_requested']=len(credit_data['enquiryhistorytop'])

    # 11. accountmonthlypaymenthistory

    # loan duration
    durations=[]
    for summary in credit_data['accountmonthlypaymenthistory']:
        duration = summary['loanduration']
        if duration=="Not Available":
            continue
        durations.append(int(duration.split()[0]))


    risk_summary_data['max_loandurations']=max(durations)
    risk_summary_data['mean_loandurations']=sum(durations)/len(durations)
    risk_summary_data['min_loandurations']=min(durations)

    # current balance amount
    amounts=[]
    for summary in credit_data['accountmonthlypaymenthistory']:
        amount = summary['currentbalanceamt']

        amounts.append(float(amount.replace(',','')))

    risk_summary_data['max_currentbalanceamt']=max(amounts)
    risk_summary_data['mean_currentbalanceamt']=sum(amounts)/len(amounts)
    risk_summary_data['min_currentbalanceamt']=min(amounts)

    # opening balance amount
    amounts=[]
    for summary in credit_data['accountmonthlypaymenthistory']:
        amount = summary['openingbalanceamt']

        amounts.append(float(amount.replace(',','')))

    risk_summary_data['max_openingbalanceamt']=max(amounts)
    risk_summary_data['mean_openingbalanceamt']=sum(amounts)/len(amounts)
    risk_summary_data['min_openingbalanceamt']=min(amounts)

    # amount overdue
    amounts=[]
    for summary in credit_data['accountmonthlypaymenthistory']:
        if 'amountoverdue' not in summary:
            continue
        amount = summary['amountoverdue']

        amounts.append(float(amount.replace(',','')))

    risk_summary_data['max_amountoverdue']=max(amounts)
    risk_summary_data['mean_amountoverdue']=sum(amounts)/len(amounts)
    risk_summary_data['min_amountoverdue']=min(amounts)

    # age of bank account open

    ages=[]
    for summary in credit_data['accountmonthlypaymenthistory']:
        accdate = datetime.strptime(summary['dateaccountopened'],'%d/%m/%Y')
        age=(today-accdate).days

        ages.append(age)
    
    
    risk_summary_data['max_age_in_days_account']=max(ages)
    risk_summary_data['mean_age_in_days_account']=sum(ages)/len(ages)
    risk_summary_data['min_age_in_days_account']=min(ages)
    
    # monthly delinquency report from 24 months payment
    
    # we compute the percentage of payment made on time for every 24 months
    percentage_payment_made_ontime=[]
    for summary in credit_data['accountmonthlypaymenthistory']:
        paymentontime=0
        counter=0
        for i in range(1,25): # loop through the 24 months
            if i<10:
                if summary['m0'+str(i)]=='0':
                    paymentontime+=1
                if summary['m0'+str(i)]!='#':
                    counter+=1
            else:
                if summary['m'+str(i)]=='0':
                    paymentontime+=1
                if summary['m'+str(i)]!='#':
                    counter+=1
                    
        p=100*paymentontime/counter if counter>0 else np.nan
        percentage_payment_made_ontime.append(p)
        
    
    risk_summary_data['mean_percentage_paymentontime']=np.nanmean(percentage_payment_made_ontime)
    
    return risk_summary_data

In [20]:
# Convert to DataFrame
data_extracted = [extract_summary(report) for report in credit_reports]
df = pd.DataFrame(data_extracted)

# Display the DataFrame
print(df.head())

   gender   birthdate nationality dependants oldest occupation  \
0    Male  06/05/1991     Nigeria          0   PUBLIC SERVANTS   
1  Female  30/11/1985     Nigeria          0     CIVIL SERVANT   
2  Female  14/07/1983     Nigeria          0           STUDENT   

  newest occupation  no_mobilenoupdatedondate  no_homenoupdatedondate  \
0   PUBLIC SERVANTS                         0                       3   
1            DOCTOR                         2                       3   
2           STUDENT                         0                       0   

   telephonehistory_mobile_duration_max  telephonehistory_mobile_duration_min  \
0                                3549.0                                   NaN   
1                                3824.0                                3136.0   
2                                   NaN                                   NaN   

   ...  max_openingbalanceamt  mean_openingbalanceamt min_openingbalanceamt  \
0  ...               814700.0         

In [21]:
df

Unnamed: 0,gender,birthdate,nationality,dependants,oldest occupation,newest occupation,no_mobilenoupdatedondate,no_homenoupdatedondate,telephonehistory_mobile_duration_max,telephonehistory_mobile_duration_min,...,max_openingbalanceamt,mean_openingbalanceamt,min_openingbalanceamt,max_amountoverdue,mean_amountoverdue,min_amountoverdue,max_age_in_days_account,mean_age_in_days_account,min_age_in_days_account,mean_percentage_paymentontime
0,Male,06/05/1991,Nigeria,0,PUBLIC SERVANTS,PUBLIC SERVANTS,0,3,3549.0,,...,814700.0,228454.282857,0.0,22441.39,3434.581429,0.0,4389,3323.571429,2029,85.869565
1,Female,30/11/1985,Nigeria,0,CIVIL SERVANT,DOCTOR,2,3,3824.0,3136.0,...,217000.0,36564.705882,0.0,0.0,0.0,0.0,4805,2341.176471,1749,96.060606
2,Female,14/07/1983,Nigeria,0,STUDENT,STUDENT,0,0,,,...,1080936.0,363645.333333,0.0,12000.0,4000.0,0.0,2092,1919.0,1749,61.111111


#### We further process the dataframe to have only integers and floats , which are needed for machine learning models

In [22]:
# Compute age from birthdate

df['age']=datetime.today().year-df['birthdate'].str.split('/').str[-1].astype(int)

df.drop('birthdate', axis=1, inplace=True)

In [23]:
df

Unnamed: 0,gender,nationality,dependants,oldest occupation,newest occupation,no_mobilenoupdatedondate,no_homenoupdatedondate,telephonehistory_mobile_duration_max,telephonehistory_mobile_duration_min,telephonehistory_home_duration_max,...,mean_openingbalanceamt,min_openingbalanceamt,max_amountoverdue,mean_amountoverdue,min_amountoverdue,max_age_in_days_account,mean_age_in_days_account,min_age_in_days_account,mean_percentage_paymentontime,age
0,Male,Nigeria,0,PUBLIC SERVANTS,PUBLIC SERVANTS,0,3,3549.0,,3549.0,...,228454.282857,0.0,22441.39,3434.581429,0.0,4389,3323.571429,2029,85.869565,34
1,Female,Nigeria,0,CIVIL SERVANT,DOCTOR,2,3,3824.0,3136.0,3824.0,...,36564.705882,0.0,0.0,0.0,0.0,4805,2341.176471,1749,96.060606,40
2,Female,Nigeria,0,STUDENT,STUDENT,0,0,,,,...,363645.333333,0.0,12000.0,4000.0,0.0,2092,1919.0,1749,61.111111,42


In [24]:
# Encode the categorical dataest as dummies
df=pd.get_dummies(df, columns=["gender","nationality","oldest occupation", "newest occupation"], drop_first=True)

<br>

#### Finally, we obtain the features that is ready for machine learning model inputs

In [25]:
df

Unnamed: 0,dependants,no_mobilenoupdatedondate,no_homenoupdatedondate,telephonehistory_mobile_duration_max,telephonehistory_mobile_duration_min,telephonehistory_home_duration_max,telephonehistory_home_duration_min,guarantor_counts_guarantorssecured,guarantor_counts_accounts,Is_gaurantor_firstname_available,...,max_age_in_days_account,mean_age_in_days_account,min_age_in_days_account,mean_percentage_paymentontime,age,gender_Male,oldest occupation_PUBLIC SERVANTS,oldest occupation_STUDENT,newest occupation_PUBLIC SERVANTS,newest occupation_STUDENT
0,0,0,3,3549.0,,3549.0,3324.0,0,0,False,...,4389,3323.571429,2029,85.869565,34,1,1,0,1,0
1,0,2,3,3824.0,3136.0,3824.0,3324.0,0,0,False,...,4805,2341.176471,1749,96.060606,40,0,0,0,0,0
2,0,0,0,,,,,0,0,False,...,2092,1919.0,1749,61.111111,42,0,0,1,0,1
