In [2]:
#Initial imports 
import numpy as np
import pandas as pd 
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell #Show all consecutive outputs
InteractiveShell.ast_node_interactivity = "all"

In [3]:
#Removes all unnecessary warnings by Python
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Imports for better visualization

from collections import defaultdict
import scipy as sp
from matplotlib import rcParams
import matplotlib.cm as cm
import matplotlib as mpl

#colorbrewer2 Dark2 qualitative color table
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843)]

rcParams['figure.figsize'] = (14, 6)
rcParams['figure.dpi'] = 250
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 3
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 25
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'

In [5]:
#Show all the columns 
pd.set_option('display.max_columns',None)

In [6]:
#Reading in all the data from different datasets 
defdam = pd.read_csv('defdam.csv')
defdav = pd.read_csv('defdav.csv')

In [7]:
defdam.dtypes
defdav.dtypes

Account Number      object
date of default     object
Default sum        float64
date                 int64
month                int64
year                 int64
dtype: object

Account Number      object
date of default     object
Default sum        float64
date                 int64
month                int64
year                 int64
dtype: object

In [8]:
del defdam['date of default']
del defdav['date of default']
del defdam['date']
del defdav['date']
del defdam['year']
del defdav['year']

In [9]:
bill = pd.get_dummies(defdam['month'])
dfm = pd.concat([defdam, bill], axis=1)
dfm.rename(columns={1: 'Jan_def',2:'Feb_def',3:'Mar_def'}, inplace=True)
del dfm['month']
dfm.head()

Unnamed: 0,Account Number,Default sum,Jan_def,Feb_def,Mar_def
0,M100,1037.2,0.0,1.0,0.0
1,M104,611.8,0.0,0.0,1.0
2,M114,2193.5,0.0,0.0,1.0
3,M125,507.8,0.0,0.0,1.0
4,M132,1405.5,0.0,1.0,0.0


In [10]:
bill = pd.get_dummies(defdav['month'])
dfv = pd.concat([defdav, bill], axis=1)
del dfv['month']
dfv.rename(columns={5:'May_def',6:'Jun_def',7:'July_def'}, inplace=True)
dfv.head()

Unnamed: 0,Account Number,Default sum,May_def,Jun_def,July_def
0,V5,713.4,0.0,0.0,1.0
1,V48,1402.8,0.0,0.0,1.0
2,V74,899.9,0.0,0.0,1.0
3,V79,1483.2,0.0,1.0,0.0
4,V95,1350.3,1.0,0.0,0.0


In [11]:
dfm['Jan_def'] = dfm['Jan_def']*dfm['Default sum']
dfm['Feb_def'] = dfm['Feb_def']*dfm['Default sum']
dfm['Mar_def'] = dfm['Mar_def']*dfm['Default sum']

In [12]:
dfv['May_def'] = dfv['May_def']*dfv['Default sum']
dfv['Jun_def'] = dfv['Jun_def']*dfv['Default sum']
dfv['July_def'] = dfv['July_def']*dfv['Default sum']

In [13]:
defm = dfm.groupby('Account Number').sum()
defv = dfv.groupby('Account Number').sum()

In [14]:
defm.head(10)
defv.head(10)

Unnamed: 0_level_0,Default sum,Jan_def,Feb_def,Mar_def
Account Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M100,1037.2,0.0,1037.2,0.0
M1002,2815.2,938.4,938.4,938.4
M1039,1802.6,0.0,1066.8,735.8
M104,611.8,0.0,0.0,611.8
M1045,649.8,0.0,0.0,649.8
M1049,969.1,969.1,0.0,0.0
M1072,549.9,549.9,0.0,0.0
M1077,1552.4,776.2,776.2,0.0
M1086,774.4,0.0,0.0,774.4
M1091,1951.4,1281.1,670.3,0.0


Unnamed: 0_level_0,Default sum,May_def,Jun_def,July_def
Account Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
V1007,481.0,0.0,0.0,481.0
V1011,1647.6,584.8,531.4,531.4
V1013,1137.2,617.0,520.2,0.0
V1014,1345.4,0.0,742.7,602.7
V1029,614.8,0.0,0.0,614.8
V1034,814.8,0.0,0.0,814.8
V1041,677.7,677.7,0.0,0.0
V1042,1262.4,631.2,0.0,631.2
V1044,1027.0,0.0,513.5,513.5
V1049,1206.5,0.0,1206.5,0.0


In [15]:
defm.to_csv('defdamup.csv',index=True)
defv.to_csv('defdavup.csv',index=True)