In [20]:
import pandas as pd
import numpy as np
import random as random
from datetime import datetime
from datetime import timedelta  

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

import tensorflow as tf
from tensorflow import keras


# Cufflinks wrapper on plotly
import cufflinks
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot
import plotly.graph_objs as go
cufflinks.go_offline()

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
import plotly.figure_factory as ff

# Data import and preparation

In [2]:
data_input=pd.read_excel("raw_data.xlsx", sep=";")

dataset_downland_date=data_input.data_ts.max()
dataset_downland_date

Timestamp('2019-08-23 00:00:00')

In [3]:
data_input.head(10)

Unnamed: 0,nrpożyczki,nr Klienta,płeć,wiek,spłacono dotychczas,data ostatniej spłaty,kwota pozyczki,sposob wyplaty,data wyplaty,Ilość rat,...,data odstapienia,metoda podpisania umowy broker online,okres,kwota,data,yield,Day,Month,Year,data_ts
0,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,1,0.0,,0.0,,,,NaT
1,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,2,0.0,,0.0,,,,NaT
2,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,3,0.0,,0.0,,,,NaT
3,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,4,0.0,,0.0,,,,NaT
4,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,5,0.0,,0.0,,,,NaT
5,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,6,0.0,,0.0,,,,NaT
6,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,7,0.0,,0.0,,,,NaT
7,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,8,0.0,,0.0,,,,NaT
8,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,9,0.0,,0.0,,,,NaT
9,20180524/QS/4829,19086,Mezczyzna,27,0.0,0,3000,Przelew bankowy,2018-05-25,24,...,NaT,broker,10,0.0,,0.0,,,,NaT


In [4]:
data_input["data"]= pd.to_datetime(data_input.data)
data_input["data_wyplaty"]= pd.to_datetime(data_input["data wyplaty"])
data_input['Pierwsza rata']= pd.to_datetime(data_input['Pierwsza rata'])
data_input["initial_loan_value"]=data_input.rata*data_input[' Ilość rat']

features=['nrpożyczki', 'nr Klienta', 'płeć', 'wiek','kod pocztowy','sposob wyplaty',
        'initial_loan_value',' Ilość rat',"data_wyplaty", 'Pierwsza rata','rata',  'okres',
       'kwota', 'data_ts', 'yield']

df=data_input[features]

## Translating column names to English

In [56]:
features_translate={
    'nrpożyczki':'loan_ID',
    'nr Klienta':"client_ID",
    'sposob wyplaty':"cash_payment",
    'okres':"period",
    "rata":"installment",
    'kod pocztowy':"post_code",
    ' Ilość rat':"installment_count",
    'Pierwsza rata':"first_installment_date",
    'kwota':"payment_value",
    "data_ts":"payment_date",
    "data_wyplaty":"payout_date",
    "yield":"cumulated_payments",
    'płeć':"sex",
    "wiek":"age",

    


}

df.rename(columns=features_translate, inplace=True)

df["sex"]=np.where(df.sex=="Kobieta","Female","Male")

df.columns

Index(['level_0', 'index', 'loan_ID', 'client_ID', 'sex', 'age', 'post_code',
       'cash_payment', 'initial_loan_value', 'installment_count',
       'payout_date', 'first_installment_date', 'installment', 'period',
       'payment_value', 'payment_date', 'cumulated_payments',
       'expected_payment_date', 'last_payment_period', 'last_payment_date',
       'max_cumulated_payments', 'payment_delay', 'month',
       'payment_completeness', 'paid_ratio', 'year', 'is_male',
       'is_christmass_season', 'is_default', 'month_year', 'date_bucket',
       'paid_ratio_bucket'],
      dtype='object')

# Data transformation and Feature engineering

In [57]:
df["expected_payment_date"]=df.first_installment_date

import datetime
for i in range(0,len(df)):
    df["expected_payment_date"][i]=df.first_installment_date[i]+pd.DateOffset(months=int(df.period[i])-1)


df_grouped=df[(df.payment_date.isnull()==False)][['loan_ID','period',"payment_date",'cumulated_payments']].groupby('loan_ID').max()
df_grouped.rename(columns={'period':"last_payment_period","payment_date":"last_payment_date",'cumulated_payments':'max_cumulated_payments'}, inplace=True)
df_grouped.reset_index(inplace=True)
df=df.merge(df_grouped, on="loan_ID")

df.columns

Index(['level_0', 'index', 'loan_ID', 'client_ID', 'sex', 'age', 'post_code',
       'cash_payment', 'initial_loan_value', 'installment_count',
       'payout_date', 'first_installment_date', 'installment', 'period',
       'payment_value', 'payment_date', 'cumulated_payments',
       'expected_payment_date', 'last_payment_period_x', 'last_payment_date_x',
       'max_cumulated_payments_x', 'payment_delay', 'month',
       'payment_completeness', 'paid_ratio', 'year', 'is_male',
       'is_christmass_season', 'is_default', 'month_year', 'date_bucket',
       'paid_ratio_bucket', 'last_payment_period_y', 'last_payment_date_y',
       'max_cumulated_payments_y'],
      dtype='object')

## Creating default flag

Filling the next payment with 0s and predicted date to segment defaulting clients from clients expected to pay after dataset was downlanded

In [58]:
df.payment_date=np.where(df.period==df.last_payment_period+1,
                        df.last_payment_date+pd.DateOffset(months=2),
                        df.payment_date)

df.cumulated_payments=np.where(df.period==df.last_payment_period+1,
                        df.max_cumulated_payments,
                        df.cumulated_payments)

df["cash_payment"]=np.where(df.cash_payment=='Przelew bankowy',0,1)

AttributeError: 'DataFrame' object has no attribute 'last_payment_period'

Removing empty entries (no dates) and payments expected in future - after dataset download date

In [None]:
df=df[(df.payment_date.isnull()==False)].sort_values(by=["loan_ID","period"])
df.shape

In [None]:
df=df[df.payment_date<=dataset_downland_date]
df.shape

In [None]:
df.head(10)

In [11]:
## Calculating key KPIs

df["payment_delay"]=df.payment_date-df.expected_payment_date
df.payment_delay=df.payment_delay.apply(lambda x: x.days)
df["month"]=df.expected_payment_date.apply(lambda x: x.month)
df["payment_completeness"]=df.payment_value/df.installment
df['paid_ratio']=df.cumulated_payments/df.initial_loan_value


df["year"]=df.payment_date.apply(lambda x:x.year)

df["is_male"]=np.where(df.sex=="Male",1,0)
df["is_christmass_season"]=df.month.apply(lambda x: x in(1,2,12))
df.is_christmass_season=np.where(df.is_christmass_season,1,0)

In [12]:
threshold=0.2
df["is_default"]=np.where(df.payment_completeness<threshold,1,0)
df.payment_completeness=np.where(df.payment_completeness>2,2, df.payment_completeness)

In [13]:
df.reset_index(inplace=True)
import datetime
df["month_year"]=np.NaN
for i in range(0,len(df)):
   
    df["month_year"][i]=datetime.date(year=df.payment_date[i].year,month=df.payment_date[i].month,day=1)
    

df["date_bucket"] = pd.cut(df.payment_date,25)
df["date_bucket"]=df["date_bucket"].apply(lambda x:x.right)


df.drop(columns=["index"],inplace=True)

df.payment_delay=np.where(df.payment_delay>60,60,df.payment_delay)
df.payment_delay=np.where(df.payment_delay<-60,-60,df.payment_delay)

## Outputing data for further modeling

In [14]:
df.to_excel("processed_data.xlsx")

## Aggregating monthly data to form client characteristic

In [15]:
groupby_features=['loan_ID', 'client_ID', 'sex', 'age', 'post_code', 'cash_payment',
       'initial_loan_value', 'installment_count', 'payout_date',
        'first_installment_date', 'installment',"is_male"]
max_features=['payment_date','paid_ratio','is_default',"period"]

df=df.sort_values(by=["loan_ID","period"]).reset_index()
df_grouped=df.groupby(groupby_features)[max_features].last().reset_index()
df_grouped["lowest_payment"]=df.groupby(groupby_features).last().reset_index()["payment_completeness"]


df_grouped["age_dec"]=df_grouped.age.apply(lambda x:int(np.around(x-5,-1)))
df_grouped["payment_method"]=np.where(df_grouped.cash_payment==1,"Gotówka","Przelew")

In [16]:
groupby_features=['loan_ID', 'client_ID', 'sex', 'age', 'post_code', 'cash_payment',
       'initial_loan_value', 'installment_count', 'payout_date',
        'first_installment_date', 'installment',"is_male"]
max_features=['payment_date','paid_ratio','is_default',"period"]

df=df.sort_values(by=["loan_ID","period"]).reset_index()
df_grouped=df.groupby(groupby_features)[max_features].last().reset_index()
df_grouped["lowest_payment"]=df.groupby(groupby_features).last().reset_index()["payment_completeness"]


df_grouped["age_dec"]=df_grouped.age.apply(lambda x:int(np.around(x-5,-1)))
df_grouped["payment_method"]=np.where(df_grouped.cash_payment==1,"Gotówka","Przelew")

df_grouped.head()

Unnamed: 0,loan_ID,client_ID,sex,age,post_code,cash_payment,initial_loan_value,installment_count,payout_date,first_installment_date,installment,is_male,payment_date,paid_ratio,is_default,period,lowest_payment,age_dec,payment_method
0,20170626/13,7,Female,57,41-200,0,5872.32,24,2017-06-26,2017-07-20,244.68,0,2018-09-27,0.544023,1,14,0.0,50,Przelew
1,20170630/55,23,Male,27,64-100,0,5872.32,24,2017-06-30,2017-07-15,244.68,1,2018-09-17,0.583333,1,15,0.0,20,Przelew
2,20170630/62,34,Female,61,42-500,0,3914.88,24,2017-06-30,2017-07-30,163.12,0,2018-09-04,0.468635,1,12,0.0,60,Przelew
3,20170703/70,41,Male,35,63-220,0,6851.28,24,2017-07-06,2017-08-15,285.47,1,2018-06-20,0.378032,1,11,0.0,30,Przelew
4,20170703/81,44,Female,69,21-580,0,4893.6,24,2017-07-03,2017-08-15,203.9,0,2018-12-22,0.584136,1,14,0.0,60,Przelew


In [17]:
df_grouped.is_male.sum()/df_grouped.shape[0]

df_grouped.is_default.sum()/df_grouped.shape[0]

df_grouped_default=df_grouped[df_grouped.is_default==1]

key_KPIs=["is_default","paid_ratio"]

0.3577896447643371

0.613187833758997

# Exploring client characteristics

In [18]:
corrs=df_grouped[[ 'paid_ratio','is_default','sex', 'age','cash_payment','initial_loan_value']].corr()
figure = ff.create_annotated_heatmap(
    z=np.where(corrs==1,np.NaN,corrs),
    x=list(corrs.columns),
    y=list(corrs.index),
    annotation_text=np.where(corrs==1,np.NaN,corrs).round(2),
    showscale=True)
figure.layout.margin=dict(l = 200)


iplot(figure)

## Age

In [21]:
trace0=go.Histogram(x=df_grouped[df_grouped.is_male==1].age,
        marker=dict(
        color="blue",
        opacity=0.5
        ),
        name="Male")
trace1=go.Histogram(x=df_grouped[df_grouped.is_male!=1].age,
        marker=dict(
        color="red",
        opacity=0.5
        ),
        name="Female")

data=[trace1,trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="overlay",
        title="Client composition in relation to age",
        yaxis=dict(title="Count"),
        xaxis=dict(title="Age")

    
    ))
iplot(figure)

In [22]:
trace0=go.Histogram(x=df_grouped.age,
        marker=dict(
        color="blue",
        opacity=0.5
        ),
        histnorm='percent',
        cumulative_enabled=True
       )



data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="overlay",
        title="Cumulative client composition in relation to age",
        yaxis=dict(title="Cumulative share"),
        xaxis=dict(title="Age")

    
    ))
iplot(figure)

In [23]:
df_grouped["date_bucket"] = pd.cut(df_grouped.payment_date,20)
df_grouped["date_bucket"]=df_grouped["date_bucket"].apply(lambda x:x.right)
df_age_groups=df_grouped.groupby("age_dec")[key_KPIs].mean().reset_index()


fig = go.Figure([go.Bar(x=df_age_groups.age_dec, y=df_age_groups.is_default)])
fig.update_layout(
    title='Share of defaulting clients in relation to Age',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Share of defaults',
        titlefont_size=16,
        tickfont_size=14))



df_age_groups=df_grouped.groupby(["age_dec","sex"])[key_KPIs].mean().reset_index()

In [24]:
trace0=go.Bar(name="Mężczyźni", 
              x=df_age_groups[df_age_groups.sex=="Mezczyzna"].age_dec, 
              y=df_age_groups[df_age_groups.sex=="Mezczyzna"].is_default,
             marker=dict(
            color="blue",
            opacity=0.5
            ))
trace1=go.Bar(name='Kobiety', 
              x=df_age_groups[df_age_groups.sex!="Mezczyzna"].age_dec, 
              y=df_age_groups[df_age_groups.sex!="Mezczyzna"].is_default,
               marker=dict(
            color="red",
            opacity=0.5
            ))




data=[trace0,trace1]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="group",
        title="Share of defaulting clients in relation to Age & Sex",
        yaxis=dict(title="Share of defaults"),
        xaxis=dict(title="Age")

    
    ))
iplot(figure)

In [25]:
fig = go.Figure([go.Box(x=df_grouped.sex, y=df_grouped.paid_ratio)])
fig.update_layout(
    title='Share of loan paid until default',
     xaxis=dict(
        title='Sex',
        titlefont_size=16,
        tickfont_size=14),
    yaxis=dict(
        title='Paid loan ratio',
        titlefont_size=16,
        tickfont_size=14))

In [26]:
fig = go.Figure([go.Box(x=df_grouped_default.age_dec, y=df_grouped_default.paid_ratio)])
fig.update_layout(
    title='Share of loan paid until default in relation to Age',
    xaxis=dict(
        title='Age',
        titlefont_size=16,
        tickfont_size=14),
    yaxis=dict(
        title='Paid loan ratio',
        titlefont_size=16,
        tickfont_size=14))

In [27]:
import plotly.express as px
fig = px.box(df_grouped_default, x="age_dec", y="paid_ratio", color="sex")

fig.show()

## Loan Volume

In [28]:
trace0=go.Histogram(x=df_grouped.initial_loan_value,
        marker=dict(
        color="blue",
        opacity=0.5
        ),
        name="Mężczyźni",
        histnorm='percent',
        cumulative_enabled=True,
        xbins=dict( # bins used for histogram
        start=0,
        end=20000,
        size=1000
   ))

data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="overlay",
        title="Client composition in relation to initial loan value",
        yaxis=dict(title="Count"),
        xaxis=dict(title="Initial loan value [PLN]")

    
    ))
iplot(figure)

In [29]:
trace0=go.Histogram(x=df_grouped.installment_count,
        marker=dict(
        color="blue",
        opacity=0.5
        ),
        histnorm='percent',
        name="Mężczyźni",
        xbins=dict( # bins used for histogram
        start=0,
        end=30,
        size=5
   ))

data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="overlay",
        title="Client composition by loan duration",
        yaxis=dict(title="Portfolio share"),
        xaxis=dict(title="Instalement count")

    
    ))
iplot(figure)

In [30]:
bins = np.array([0,2500,5000,7500,10000,12500,15000,17500,20000])
df_grouped_default["bucket"] = pd.cut(df_grouped_default.initial_loan_value, bins)
df_grouped_default["bucket"]=df_grouped_default["bucket"].apply(lambda x:x.right)

fig = go.Figure([go.Box(x=df_grouped_default.bucket, y=df_grouped_default.paid_ratio,boxpoints=False)])
fig.update_layout(
    title='Share of loan paid until default in relation to initial loan value',
     xaxis=dict(
        title='Loan value',
        titlefont_size=16,
        tickfont_size=14),
    yaxis=dict(
        title='Paid loan ratio',
        titlefont_size=16,
        tickfont_size=14))

## Payment period - installment index

In [31]:
trace0=go.Histogram(x=df_grouped_default[df_grouped_default.is_male==1].period,
                    
                    histnorm='percent',
        marker=dict(
        color="red",
        opacity=0.5
        ),
        name="Male",
        
        xbins=dict( # bins used for histogram
        start=1,
        end=20,
        size=4))

In [32]:
trace1=go.Histogram(x=df_grouped_default[df_grouped_default.is_male!=1].period,
                
                    histnorm='percent',
        marker=dict(
        color="blue",
        opacity=0.5,
        
        ),
        name="Female",
        xbins=dict( # bins used for histogram
        start=1,
        end=20,
        size=4))

data=[trace0,trace1]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="overlay",
        title="Number of installments paid before default",
        yaxis=dict(title="Share"),
        xaxis=dict(title="Paid installments")

    
    ))
iplot(figure)

## Cash vs transfer

In [33]:
trace0=go.Histogram(x=df_grouped_default[df_grouped_default.cash_payment==1].period,
                    
                    histnorm='percent',
        marker=dict(
        color="blue",
        opacity=0.5
        ),
        name="Cash payment",
        
        xbins=dict( # bins used for histogram
        start=1,
        end=20,
        size=4))


trace1=go.Histogram(x=df_grouped_default[df_grouped_default.cash_payment!=1].period,
                
                    histnorm='percent',
        marker=dict(
        color="red",
        opacity=0.5,
        
        ),
        name="Transfer",
        xbins=dict( # bins used for histogram
        start=1,
        end=20,
        size=4))

data=[trace1,trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
    
        barmode="overlay",
        title="Number of installments paid before default",
        yaxis=dict(title="Share"),
        xaxis=dict(title="Paid installments")
    
    ))
iplot(figure)

In [48]:
df_grouped_default["payment_method"]=np.where(df_grouped_default.payment_method=="Przelew","Transfer", "Cash")
fig = go.Figure([go.Box(x=df_grouped_default.payment_method, y=df_grouped_default.paid_ratio)])
fig.update_layout(
    title='Spłacone saldo kredytu w momencie zaprzestania spłat w odniesieniu do metody wypłaty',
     xaxis=dict(
        title='Payment type',
        titlefont_size=16,
        tickfont_size=14),
    yaxis=dict(
        title='Paid loan ratio',
        titlefont_size=16,
        tickfont_size=14))

# Exploring Time Series data

## Performance in relation to installment index

In [49]:
df_duration=df[["period","is_default","payment_completeness"]].groupby("period").mean()
df_duration["sample_size"]=df[["period","loan_ID"]].groupby("period").count()
df_duration.reset_index(inplace=True)


trace0=go.Scatter(
        y=1-df_duration.is_default,
        x=df_duration.period,
        mode='lines',
        marker=dict(
        color="red",
        size=5,
        opacity=0.5
        ),
        name="Share of paid installments"
    )



trace1=go.Scatter(
        y=df_duration.payment_completeness,
        x=df_duration.period,
        mode='lines',
        marker=dict(
        color="blue",
        size=5,
        opacity=0.5
        ),
        name="Payment completeness"
    )







data=[trace0,trace1]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Portfolio performance in relation to installment index",
        yaxis=dict(title="Completeness", range=(0.8,1)),
        xaxis=dict(title="Installment index", range=(1,20)),
          legend=dict(
        x=0.8,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
        bgcolor=None,
 


        )
        
    
))
iplot(figure)

The average payment completeness seems to be constantly above 80% with a serious drop after 20 months. This will need further investigation and might be effected by outliers and decreasing sample size.

In [50]:
trace0=go.Scatter(
        y=df_duration.sample_size,
        x=df_duration.period,
        mode='lines',
        marker=dict(
        color="red",
        size=5,
        opacity=0.5
        ),

    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Sample size depending on installment index",
        yaxis=dict(title="Clients count", range=(100,5000)),
        xaxis=dict(title="Installment index", range=(1,20))

    
    ))
iplot(figure)

fig = px.box(df, y="payment_delay", x="period", points=False)
fig.show()

As number of defaulting client decrease sample size with each month and additionally new loans will only go into longer durations in the future after 15 months our sample decreases by 90% which could pose a serious issue.

## How does loan maturity affect performance

In [51]:
df["paid_ratio_bucket"]=round(df.paid_ratio,1)

df_paid_ratio=df[["paid_ratio_bucket","payment_completeness","is_default"]].groupby("paid_ratio_bucket").mean()
df_paid_ratio["sample_size"]=df[["paid_ratio_bucket","loan_ID"]].groupby("paid_ratio_bucket").count()
df_paid_ratio.reset_index(inplace=True)
df_paid_ratio.head(11)


trace0=go.Bar(
        y=df_paid_ratio.is_default,
        x=df_paid_ratio.paid_ratio_bucket,
        
        marker=dict(
        color="blue",

        opacity=0.5
        ),
       
    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Ratio of defaults depending on loan maturity",
        yaxis=dict(title="Default ratio"),
        xaxis=dict(title="Paid ratio")

    
    ))
iplot(figure)

df_monthly=df.groupby("month")[["is_default","payment_completeness"]].mean().reset_index()

Unnamed: 0,paid_ratio_bucket,payment_completeness,is_default,sample_size
0,0.0,0.912876,0.067837,4157
1,0.1,0.889047,0.112378,11310
2,0.2,0.902267,0.101494,9173
3,0.3,0.911528,0.096721,6100
4,0.4,0.91623,0.093228,3175
5,0.5,0.928719,0.09611,1748
6,0.6,0.951821,0.084788,802
7,0.7,0.953818,0.091503,459
8,0.8,0.908409,0.085714,280
9,0.9,0.957567,0.146067,89


## Performance evolution over 2 year period

In [52]:
trace0=go.Bar(
        y=df_monthly.is_default,
        x=df_monthly.month,
        
        marker=dict(
        color="blue",

        opacity=0.5
        ),
       
    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Default share seasonality",
        yaxis=dict(title="Share of defaults"),
        xaxis=dict(title="Month")

    
    ))
iplot(figure)

In [53]:
df_date_group=df.groupby('month_year')[["is_default","payment_completeness","period"]].mean().reset_index()
df_date_group["client_count"]=df.groupby('month_year')["is_default"].count().reset_index().is_default

df_date_group.head()

Unnamed: 0,month_year,is_default,payment_completeness,period,client_count
0,2017-07-01,0.0,1.0,1.0,3
1,2017-08-01,0.0,1.00289,1.185185,27
2,2017-09-01,0.0,1.001836,1.617021,47
3,2017-10-01,0.00885,1.011077,1.778761,113
4,2017-11-01,0.0,1.00858,1.931579,190


In [54]:
trace0=go.Scatter(
        y=df_date_group.is_default,
        x=df_date_group.month_year,
        mode='lines',
        marker=dict(
        color="red",
        size=5,
        opacity=0.5
        ),
        name="Default share"
    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Average default share evolution",
        yaxis=dict(title="Default share"),
        xaxis=dict(title="Date")

    
    ))
iplot(figure)

In [55]:
trace0=go.Scatter(
        y=df_date_group.payment_completeness,
        x=df_date_group.month_year,
        mode='lines',
        marker=dict(
        color="red",
        size=5,
        opacity=0.5
        ),

    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Average payment completeness evolution",
        yaxis=dict(title="Payment completeness"),
        xaxis=dict(title="Date")

    
    ))
iplot(figure)

In [43]:
trace0=go.Scatter(
        y=df_date_group.period,
        x=df_date_group.month_year,
        mode='lines',
        marker=dict(
        color="red",
        size=5,
        opacity=0.5
        ),

    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Average installment index evolution",
        yaxis=dict(title="Installment index"),
        xaxis=dict(title="Date")

    
    ))
iplot(figure)

In [44]:
trace0=go.Scatter(
        y=df_date_group.client_count,
        x=df_date_group.month_year,
        mode='lines',
        marker=dict(
        color="red",
        size=5,
        opacity=0.5
        ),
    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Evolution of portfolio size",
        yaxis=dict(title="Client count"),
        xaxis=dict(title="Date")

    
    ))
iplot(figure)

In [59]:

from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]]);


fig.update_yaxes(title_text="Average installment index", secondary_y=False)
fig.update_yaxes(title_text="Share of defaults", secondary_y=True)

fig.update_xaxes(title_text="Date")



fig.add_trace(
    go.Scatter(
         x=df_date_group.month_year,
         y=df_date_group.is_default,
         name="Share of defaults"),
         secondary_y=True,
);


fig.add_trace(
    go.Scatter(
         x=df_date_group.month_year,
         y=df_date_group.period,
         name="Average installment index"),
         secondary_y=False,
);

fig.update_layout(
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
        bgcolor=None,

    )
);



In [60]:
fig