In [364]:
#Importing modules
import pandas as pd
import numpy as np
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')

In [365]:
#Loading the sample data from local location
demo_init=pd.read_csv(r'D:\Coding\projects\usurious_sisters\Train\train_Data.csv')
print(demo_init.shape)
demo_init.tail()

(128655, 26)


Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,...,ManufacturerID,SupplierID,LTV,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month
128650,143390,Half Yearly,Arrear,Closed,Direct Debit,424,PANIPAT,24,470000,265601.0,...,1568,48879,40.17,M,25.0,65333.33,SONIPAT,HARYANA,131403.0,24-30 Months
128651,143391,Half Yearly,Arrear,Closed,Direct Debit,424,PANIPAT,24,460000,275630.0,...,1568,48879,59.92,M,25.0,83333.33,SONIPAT,HARYANA,131403.0,No Top-up Service
128652,143393,Monthly,Arrear,Active,Direct Debit,424,PANIPAT,23,545000,300733.0,...,1568,44118,52.38,M,36.0,248500.0,SONIPAT,HARYANA,131024.0,No Top-up Service
128653,143394,Half Yearly,Arrear,Active,Direct Debit,424,PANIPAT,35,350000,250962.0,...,1568,48879,50.37,M,37.0,84500.0,SONIPAT,HARYANA,131103.0,No Top-up Service
128654,143395,Half Yearly,Arrear,Active,Direct Debit,424,PANIPAT,24,370000,200428.0,...,1568,48879,54.17,M,33.0,178166.67,SONIPAT,HARYANA,131402.0,No Top-up Service


In [366]:
#printing all column names for reference
print(demo_init.columns)

Index(['ID', 'Frequency', 'InstlmentMode', 'LoanStatus', 'PaymentMode',
       'BranchID', 'Area', 'Tenure', 'AssetCost', 'AmountFinance',
       'DisbursalAmount', 'EMI', 'DisbursalDate', 'MaturityDAte', 'AuthDate',
       'AssetID', 'ManufacturerID', 'SupplierID', 'LTV', 'SEX', 'AGE',
       'MonthlyIncome', 'City', 'State', 'ZiPCODE', 'Top-up Month'],
      dtype='object')


In [367]:
#estimating the fill rates of columns. 
df_nullrate = pd.DataFrame([demo_init.isna().sum()]).transpose()
df_nullrate.rename(columns = {0:'total null rows'}, inplace = True)
df_nullrate['percent_nulls'] = df_nullrate['total null rows']/128655*100
df_nullrate

Unnamed: 0,total null rows,percent_nulls
ID,0,0.0
Frequency,0,0.0
InstlmentMode,0,0.0
LoanStatus,0,0.0
PaymentMode,0,0.0
BranchID,0,0.0
Area,11653,9.057557
Tenure,0,0.0
AssetCost,0,0.0
AmountFinance,0,0.0


In [368]:
#getting unique values for the variable top up month
demo_init["Top-up Month"].unique()

array([' > 48 Months', 'No Top-up Service', '12-18 Months',
       '36-48 Months', '18-24 Months', '24-30 Months', '30-36 Months'],
      dtype=object)

In [369]:
#Turning the target variable(loan top up bucket) into a quantitative variable
#numerical categories work here since a 18-24 bucket is higher in value than 12-18 months bucket
demo_init["ASSN_CAT"] = 1000
demo_init.loc[demo_init["Top-up Month"]=="No Top-up Service", "ASSN_CAT" ] = 0
demo_init.loc[demo_init["Top-up Month"]=="12-18 Months", "ASSN_CAT" ] = 1
demo_init.loc[demo_init["Top-up Month"]=="18-24 Months", "ASSN_CAT" ] = 2
demo_init.loc[demo_init["Top-up Month"]=="24-30 Months", "ASSN_CAT" ] = 3
demo_init.loc[demo_init["Top-up Month"]=="30-36 Months", "ASSN_CAT" ] = 4
demo_init.loc[demo_init["Top-up Month"]=="36-48 Months", "ASSN_CAT" ] = 5
demo_init.loc[demo_init["Top-up Month"]==" > 48 Months", "ASSN_CAT" ] = 6

In [370]:
#qc: if there is '1000' in unique value, means either there is a missing value, or the assignment is wrong.
demo_init["ASSN_CAT"].unique()
#passed

array([6, 0, 1, 5, 2, 3, 4], dtype=int64)

In [371]:
#For these categorical variables, we will have one hot vectors - getting their unique values
print(demo_init["Frequency"].unique())
print(demo_init["LoanStatus"].unique())
print(demo_init["InstlmentMode"].unique())
print(demo_init["PaymentMode"].unique())
print(demo_init["State"].unique()) #21 states in total.

['Monthly' 'Quatrly' 'Half Yearly' 'BI-Monthly']
['Closed' 'Active']
['Arrear' 'Advance']
['PDC_E' 'PDC' 'Direct Debit' 'Billed' 'ECS' 'Auto Debit' 'SI Reject'
 'ECS Reject' 'Cheque' 'PDC Reject' 'Escrow']
['MADHYA PRADESH' 'CHATTISGARH' 'ORISSA' 'BIHAR' 'WEST BENGAL' 'RAJASTHAN'
 'HARYANA' 'PUNJAB' 'HIMACHAL PRADESH' 'UTTAR PRADESH' 'UTTARAKHAND'
 'KARNATAKA' 'ANDHRA PRADESH' 'TELANGANA' 'GUJARAT' 'MAHARASHTRA' 'DELHI'
 'ASSAM' 'JHARKHAND' 'TAMIL NADU' 'DADRA AND NAGAR HAVELI' 'CHANDIGARH']


In [372]:
#converting the binary variables first
#if Loan status is closed then 0 if active then 1
demo_init["LOAN_STATUS"] = 1000
demo_init.loc[demo_init["LoanStatus"]=="Closed", "LOAN_STATUS" ] = 0
demo_init.loc[demo_init["LoanStatus"]=="Active", "LOAN_STATUS" ] = 1

In [373]:
#if Instalment Mode is arrear then 0 if advance then 1
demo_init["INS_MODE"] = 1000
demo_init.loc[demo_init["InstlmentMode"]=="Arrear", "INS_MODE" ] = 0
demo_init.loc[demo_init["InstlmentMode"]=="Advance", "INS_MODE" ] = 1

categories for payment mode:
ECS, Auto Debit, Escrow, SI 
Cheque, PDC, PDC_E
Billed, Direct Debit
SI reject, ECS reject, PDC reject

In [374]:
#clubbing payment mode into descriptive categories - automated, cheque, discretionary and reject categories.
#We will be using these new categories for generating one hot vectors.
demo_init["PYMNT_MODE"] = 1000
demo_init.loc[demo_init["PaymentMode"].isin(["ECS", "Direct Debit", "Auto Debit", "Escrow", "SI"]), "PYMNT_MODE"] = "automated"
demo_init.loc[demo_init["PaymentMode"].isin(["Cheque", "PDC", "PDC_E"]), "PYMNT_MODE" ] = "cheque"
demo_init.loc[demo_init["PaymentMode"].isin(["Billed"]), "PYMNT_MODE" ] = "discretionary"
demo_init.loc[demo_init["PaymentMode"].isin(["SI Reject", "ECS Reject", "PDC Reject"]), "PYMNT_MODE" ] = "reject"

In [375]:
#qc: if there is 1000 in unique value of loan Status or Ins_Mode, means either there is a missing value, or the assignment is wrong.
print(demo_init["LOAN_STATUS"].unique())
print(demo_init["INS_MODE"].unique())
print(demo_init["PYMNT_MODE"].unique())
#passed

[0 1]
[0 1]
['cheque' 'automated' 'discretionary' 'reject']


In [376]:
pd.DataFrame(demo_init["Area"].unique()).tail()
#so with 92 values if we create a one hot vector/dummy for each category, the data will become huge. Although, we should see if any of
#these areas have high concentrations of loan topups.

Unnamed: 0,0
88,MANDLA
89,AHMEDABAD
90,BAGALKOT
91,PANIPAT
92,GANGAPUR CITY


In [377]:
pd.DataFrame(demo_init["City"].unique()).tail()
#so with 489 values if we create a one hot vector/dummy for each category, the data will become huge. Although, we should see if any of
#these areas have high concentrations of loan topups.

Unnamed: 0,0
484,SAHARANPUR
485,BALLIA
486,YADGIR
487,BANKA
488,CHATRA


In [378]:
#creating column index for demo_init dataframe to merge the one-hot vectors
demo_init.reset_index(inplace = True)

In [379]:
#creating one hot vectors for the remanining, multinomial variabes
one_hot_cols = ["Frequency", "PYMNT_MODE", "State"]
for i in one_hot_cols:
    df_dummy = pd.get_dummies(demo_init[i], prefix='dummy')
    df_dummy.reset_index(inplace=True)
    

    demo_init = pd.merge(demo_init, df_dummy, on = 'index', how = 'left')

In [380]:
print(demo_init.shape)
demo_init.head()

(128655, 61)


Unnamed: 0,index,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,...,dummy_MADHYA PRADESH,dummy_MAHARASHTRA,dummy_ORISSA,dummy_PUNJAB,dummy_RAJASTHAN,dummy_TAMIL NADU,dummy_TELANGANA,dummy_UTTAR PRADESH,dummy_UTTARAKHAND,dummy_WEST BENGAL
0,0,1,Monthly,Arrear,Closed,PDC_E,1,,48,450000,...,1,0,0,0,0,0,0,0,0,0
1,1,2,Monthly,Advance,Closed,PDC,333,BHOPAL,47,485000,...,1,0,0,0,0,0,0,0,0,0
2,2,3,Quatrly,Arrear,Active,Direct Debit,1,,68,690000,...,1,0,0,0,0,0,0,0,0,0
3,3,7,Monthly,Advance,Closed,Billed,125,GUNA,48,480000,...,1,0,0,0,0,0,0,0,0,0
4,4,8,Monthly,Arrear,Closed,Billed,152,BILASPUR,44,619265,...,0,0,0,0,0,0,0,0,0,0


In [381]:
#qc - Frequency
demo_init[['Frequency','dummy_BI-Monthly','dummy_Half Yearly','dummy_Monthly','dummy_Quatrly']].tail()
#Pass

Unnamed: 0,Frequency,dummy_BI-Monthly,dummy_Half Yearly,dummy_Monthly,dummy_Quatrly
128650,Half Yearly,0,1,0,0
128651,Half Yearly,0,1,0,0
128652,Monthly,0,0,1,0
128653,Half Yearly,0,1,0,0
128654,Half Yearly,0,1,0,0


In [382]:
#qc - PYMNT_MODE
demo_init[['PaymentMode','PYMNT_MODE','dummy_automated','dummy_cheque','dummy_discretionary','dummy_reject']].head()
#pass

Unnamed: 0,PaymentMode,PYMNT_MODE,dummy_automated,dummy_cheque,dummy_discretionary,dummy_reject
0,PDC_E,cheque,0,1,0,0
1,PDC,cheque,0,1,0,0
2,Direct Debit,automated,1,0,0,0
3,Billed,discretionary,0,0,1,0
4,Billed,discretionary,0,0,1,0


In [383]:
#qc - States

demo_init[['State','dummy_ANDHRA PRADESH',
       'dummy_ASSAM', 'dummy_BIHAR', 'dummy_CHANDIGARH', 'dummy_CHATTISGARH',
       'dummy_DADRA AND NAGAR HAVELI', 'dummy_DELHI', 'dummy_GUJARAT',
       'dummy_HARYANA', 'dummy_HIMACHAL PRADESH', 'dummy_JHARKHAND',
       'dummy_KARNATAKA', 'dummy_MADHYA PRADESH', 'dummy_MAHARASHTRA',
       'dummy_ORISSA', 'dummy_PUNJAB', 'dummy_RAJASTHAN', 'dummy_TAMIL NADU',
       'dummy_TELANGANA', 'dummy_UTTAR PRADESH', 'dummy_UTTARAKHAND',
       'dummy_WEST BENGAL']].head()
#pass

Unnamed: 0,State,dummy_ANDHRA PRADESH,dummy_ASSAM,dummy_BIHAR,dummy_CHANDIGARH,dummy_CHATTISGARH,dummy_DADRA AND NAGAR HAVELI,dummy_DELHI,dummy_GUJARAT,dummy_HARYANA,...,dummy_MADHYA PRADESH,dummy_MAHARASHTRA,dummy_ORISSA,dummy_PUNJAB,dummy_RAJASTHAN,dummy_TAMIL NADU,dummy_TELANGANA,dummy_UTTAR PRADESH,dummy_UTTARAKHAND,dummy_WEST BENGAL
0,MADHYA PRADESH,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,MADHYA PRADESH,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,MADHYA PRADESH,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,MADHYA PRADESH,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,CHATTISGARH,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [384]:
#reading cleaned bureau data
b_df = pd.read_csv(r'D:\Coding\projects\usurious_sisters\Train\beureau_cleandata.csv')
print(b_df.shape)
b_df.head()

(128655, 29)


Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,...,SEX,AGE,MonthlyIncome,City,State,ZiPCODE,Top-up Month,SUM(DISBURSED-AMT/HIGH CREDIT),SUM(CURRENT-BAL),OTHERS-DISBURSED
0,1,Monthly,Arrear,Closed,PDC_E,1,,48,450000,275000.0,...,M,49.0,35833.33,RAISEN,MADHYA PRADESH,464993.0,> 48 Months,2201352.0,618526.0,1926352.0
1,2,Monthly,Advance,Closed,PDC,333,BHOPAL,47,485000,350000.0,...,M,23.0,666.67,SEHORE,MADHYA PRADESH,466001.0,No Top-up Service,18117081.0,10349457.0,17767081.0
2,3,Quatrly,Arrear,Active,Direct Debit,1,,68,690000,519728.0,...,M,39.0,45257.0,BHOPAL,MADHYA PRADESH,462030.0,12-18 Months,3708369.0,2065165.0,3188641.0
3,7,Monthly,Advance,Closed,Billed,125,GUNA,48,480000,400000.0,...,M,24.0,20833.33,ASHOK NAGAR,MADHYA PRADESH,473335.0,> 48 Months,1272553.0,624000.0,872553.0
4,8,Monthly,Arrear,Closed,Billed,152,BILASPUR,44,619265,440000.0,...,M,56.0,27313.67,BILASPUR,CHATTISGARH,495442.0,36-48 Months,2025216.0,974119.0,1585216.0


In [385]:
#merging with bureau data
demo_init = pd.merge(demo_init, b_df[['ID','SUM(DISBURSED-AMT/HIGH CREDIT)','SUM(CURRENT-BAL)','OTHERS-DISBURSED']], on = 'ID', how = 'left')
demo_init.head()

Unnamed: 0,index,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,...,dummy_PUNJAB,dummy_RAJASTHAN,dummy_TAMIL NADU,dummy_TELANGANA,dummy_UTTAR PRADESH,dummy_UTTARAKHAND,dummy_WEST BENGAL,SUM(DISBURSED-AMT/HIGH CREDIT),SUM(CURRENT-BAL),OTHERS-DISBURSED
0,0,1,Monthly,Arrear,Closed,PDC_E,1,,48,450000,...,0,0,0,0,0,0,0,2201352.0,618526.0,1926352.0
1,1,2,Monthly,Advance,Closed,PDC,333,BHOPAL,47,485000,...,0,0,0,0,0,0,0,18117081.0,10349457.0,17767081.0
2,2,3,Quatrly,Arrear,Active,Direct Debit,1,,68,690000,...,0,0,0,0,0,0,0,3708369.0,2065165.0,3188641.0
3,3,7,Monthly,Advance,Closed,Billed,125,GUNA,48,480000,...,0,0,0,0,0,0,0,1272553.0,624000.0,872553.0
4,4,8,Monthly,Arrear,Closed,Billed,152,BILASPUR,44,619265,...,0,0,0,0,0,0,0,2025216.0,974119.0,1585216.0


In [386]:
#dropping the index column
demo_init.drop(['index'], axis = 1, inplace = True)
demo_init.head()

Unnamed: 0,ID,Frequency,InstlmentMode,LoanStatus,PaymentMode,BranchID,Area,Tenure,AssetCost,AmountFinance,...,dummy_PUNJAB,dummy_RAJASTHAN,dummy_TAMIL NADU,dummy_TELANGANA,dummy_UTTAR PRADESH,dummy_UTTARAKHAND,dummy_WEST BENGAL,SUM(DISBURSED-AMT/HIGH CREDIT),SUM(CURRENT-BAL),OTHERS-DISBURSED
0,1,Monthly,Arrear,Closed,PDC_E,1,,48,450000,275000.0,...,0,0,0,0,0,0,0,2201352.0,618526.0,1926352.0
1,2,Monthly,Advance,Closed,PDC,333,BHOPAL,47,485000,350000.0,...,0,0,0,0,0,0,0,18117081.0,10349457.0,17767081.0
2,3,Quatrly,Arrear,Active,Direct Debit,1,,68,690000,519728.0,...,0,0,0,0,0,0,0,3708369.0,2065165.0,3188641.0
3,7,Monthly,Advance,Closed,Billed,125,GUNA,48,480000,400000.0,...,0,0,0,0,0,0,0,1272553.0,624000.0,872553.0
4,8,Monthly,Arrear,Closed,Billed,152,BILASPUR,44,619265,440000.0,...,0,0,0,0,0,0,0,2025216.0,974119.0,1585216.0


In [387]:
#saving demo_init to manually remove qualitative columns in excel
demo_init.to_csv(r"D:\Coding\projects\usurious_sisters\final_dataset\demo_init.csv", index = False)

In [388]:
#importing the dataset - demo_noqual 
demo_final = pd.read_csv(r"D:\Coding\projects\usurious_sisters\final_dataset\demo_noqual.csv")
print(demo_final.shape)
demo_final.head()

(128655, 53)


Unnamed: 0,ID,BranchID,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,DisbursalDate,MaturityDAte,AuthDate,...,dummy_PUNJAB,dummy_RAJASTHAN,dummy_TAMIL NADU,dummy_TELANGANA,dummy_UTTAR PRADESH,dummy_UTTARAKHAND,dummy_WEST BENGAL,SUM(DISBURSED-AMT/HIGH CREDIT),SUM(CURRENT-BAL),OTHERS-DISBURSED
0,1,1,48,450000,275000.0,275000.0,24000.0,10-02-2012 00:00,15-01-2016 00:00,10-02-2012 00:00,...,0,0,0,0,0,0,0,2201352,618526,1926352.0
1,2,333,47,485000,350000.0,350000.0,10500.0,31-03-2012 00:00,15-02-2016 00:00,31-03-2012 00:00,...,0,0,0,0,0,0,0,18117081,10349457,17767081.0
2,3,1,68,690000,519728.0,519728.0,38300.0,17-06-2017 00:00,10-02-2023 00:00,17-06-2017 00:00,...,0,0,0,0,0,0,0,3708369,2065165,3188641.0
3,7,125,48,480000,400000.0,400000.0,11600.0,29-11-2013 00:00,10-11-2017 00:00,29-11-2013 00:00,...,0,0,0,0,0,0,0,1272553,624000,872553.0
4,8,152,44,619265,440000.0,440000.0,15000.0,08-12-2011 00:00,05-07-2015 00:00,08-12-2011 00:00,...,0,0,0,0,0,0,0,2025216,974119,1585216.0


In [389]:
#removing the timestamp from date columns
demo_final['Disbursal_Dt'] = demo_final.DisbursalDate.str[:10]
demo_final['Maturity_Dt'] = demo_final.MaturityDAte.str[:10]
demo_final['Auth_Dt'] = demo_final.AuthDate.str[:10]
demo_final[['Disbursal_Dt','Maturity_Dt','Auth_Dt']].head()

Unnamed: 0,Disbursal_Dt,Maturity_Dt,Auth_Dt
0,10-02-2012,15-01-2016,10-02-2012
1,31-03-2012,15-02-2016,31-03-2012
2,17-06-2017,10-02-2023,17-06-2017
3,29-11-2013,10-11-2017,29-11-2013
4,08-12-2011,05-07-2015,08-12-2011


In [390]:
#dropping the old Date columns
demo_final.drop(['DisbursalDate','MaturityDAte', 'AuthDate'], inplace = True, axis = 1)
print(demo_final.shape)

(128655, 53)


In [391]:
#imputing a single NaN value in maturity date with same value as for a similar loan, converting others as string
string = '10-12-2016'
demo_final.Maturity_Dt.iloc[53392] = string
demo_final['Maturity_Dt'] = demo_final.Maturity_Dt.apply(lambda x: str(x))

In [392]:
#creating unix timestamps for dates
demo_final['Disbursal_Dt_U'] = demo_final.Disbursal_Dt.apply(lambda x: (datetime.strptime(x,'%d-%m-%Y')).timestamp())
demo_final['Maturity_Dt_U'] = demo_final.Maturity_Dt.apply(lambda x: (datetime.strptime(x,'%d-%m-%Y')).timestamp())
demo_final['Auth_Dt_U'] = demo_final.Auth_Dt.apply(lambda x: (datetime.strptime(x,'%d-%m-%Y')).timestamp())

In [393]:
#converting dates from string to datetime objects
demo_final['Disbursal_Dt'] = demo_final.Disbursal_Dt.apply(lambda x: datetime.strptime(x,'%d-%m-%Y'))
demo_final['Maturity_Dt'] = demo_final.Maturity_Dt.apply(lambda x: datetime.strptime(x,'%d-%m-%Y'))
demo_final['Auth_Dt'] = demo_final.Auth_Dt.apply(lambda x: datetime.strptime(x,'%d-%m-%Y'))

In [394]:
#QC
demo_final[['Maturity_Dt','Disbursal_Dt','Auth_Dt']].head()

Unnamed: 0,Maturity_Dt,Disbursal_Dt,Auth_Dt
0,2016-01-15,2012-02-10,2012-02-10
1,2016-02-15,2012-03-31,2012-03-31
2,2023-02-10,2017-06-17,2017-06-17
3,2017-11-10,2013-11-29,2013-11-29
4,2015-07-05,2011-12-08,2011-12-08


In [395]:
demo_final.head()

Unnamed: 0,ID,BranchID,Tenure,AssetCost,AmountFinance,DisbursalAmount,EMI,AssetID,ManufacturerID,SupplierID,...,dummy_WEST BENGAL,SUM(DISBURSED-AMT/HIGH CREDIT),SUM(CURRENT-BAL),OTHERS-DISBURSED,Disbursal_Dt,Maturity_Dt,Auth_Dt,Disbursal_Dt_U,Maturity_Dt_U,Auth_Dt_U
0,1,1,48,450000,275000.0,275000.0,24000.0,4022465,1568,21946,...,0,2201352,618526,1926352.0,2012-02-10,2016-01-15,2012-02-10,1328812000.0,1452796000.0,1328812000.0
1,2,333,47,485000,350000.0,350000.0,10500.0,4681175,1062,34802,...,0,18117081,10349457,17767081.0,2012-03-31,2016-02-15,2012-03-31,1333132000.0,1455475000.0,1333132000.0
2,3,1,68,690000,519728.0,519728.0,38300.0,25328146,1060,127335,...,0,3708369,2065165,3188641.0,2017-06-17,2023-02-10,2017-06-17,1497638000.0,1675967000.0,1497638000.0
3,7,125,48,480000,400000.0,400000.0,11600.0,13021591,1060,25094,...,0,1272553,624000,872553.0,2013-11-29,2017-11-10,2013-11-29,1385663000.0,1510252000.0,1385663000.0
4,8,152,44,619265,440000.0,440000.0,15000.0,3291320,1046,21853,...,0,2025216,974119,1585216.0,2011-12-08,2015-07-05,2011-12-08,1323283000.0,1436035000.0,1323283000.0


In [396]:
#saving the final dataset - with normal dates
demo_final.to_csv(r"D:\Coding\projects\usurious_sisters\final_dataset\final_data_v1.csv", index = False)