In [1]:
import pandas as pd
import numpy as np
import random as random

# plotly standard imports
from datetime import datetime
from datetime import timedelta  

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

import tensorflow as tf
from tensorflow import keras

import cufflinks
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot
cufflinks.go_offline()

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
import plotly.figure_factory as ff


import plotly.express as px
import plotly.graph_objs as go
import chart_studio.plotly as py

# Data import and preparation

data_input=pd.read_excel("raw_data.xlsx", sep=";")

dataset_downland_date=data_input.data_ts.max()
dataset_downland_date

data_input.head(10)

data_input["data"]= pd.to_datetime(data_input.data)
data_input["data_wyplaty"]= pd.to_datetime(data_input["data wyplaty"])
data_input['Pierwsza rata']= pd.to_datetime(data_input['Pierwsza rata'])
data_input["initial_loan_value"]=data_input.rata*data_input[' Ilość rat']

features=['nrpożyczki', 'nr Klienta', 'płeć', 'wiek','kod pocztowy','sposob wyplaty',
        'initial_loan_value',' Ilość rat',"data_wyplaty", 'Pierwsza rata','rata',  'okres',
       'kwota', 'data_ts', 'yield']

df=data_input[features]

df.columns

## Translating column names to English

features_translate={
    'nrpożyczki':'loan_ID',
    'nr Klienta':"client_ID",
    'sposob wyplaty':"cash_payment",
    'okres':"period",
    "rata":"installment",
    'kod pocztowy':"post_code",
    ' Ilość rat':"installment_count",
    'Pierwsza rata':"first_installment_date",
    'kwota':"payment_value",
    "data_ts":"payment_date",
    "data_wyplaty":"payout_date",
    "yield":"cumulated_payments",
    'płeć':"sex",
    "wiek":"age",

    


}

df.rename(columns=features_translate, inplace=True)

df["sex"]=np.where(df.sex=="Kobieta","Female","Male")

df.columns

# Data transformation and Feature engineering

df["expected_payment_date"]=df.first_installment_date

import datetime
for i in range(0,len(df)):
    df["expected_payment_date"][i]=df.first_installment_date[i]+pd.DateOffset(months=int(df.period[i])-1)


df_grouped=df[(df.payment_date.isnull()==False)][['loan_ID','period',"payment_date",'cumulated_payments']].groupby('loan_ID').max()
df_grouped.rename(columns={'period':"last_payment_period","payment_date":"last_payment_date",'cumulated_payments':'max_cumulated_payments'}, inplace=True)
df_grouped.reset_index(inplace=True)
df=df.merge(df_grouped, on="loan_ID")

df=df[(df.payment_date.isnull()==False)].sort_values(by=["loan_ID","period"])
df.shape

df=df[df.payment_date<=dataset_downland_date]
df.shape


trace0=go.Scatter(
        y=df.head(100).age,
        x=df.head(100).cumulated_payments,
        mode='markers',
        marker=dict(
        color="red",
        size=5,
        opacity=0.3
        ),
       
    )








data=[trace0]
figure=go.Figure(
    data=data,
    layout=go.Layout(
        title="Mileage vs Age",
        yaxis=dict(title="Mileage"),
        xaxis=dict(title="Age")

    
    ))
iplot(figure)





