In [1]:
import numpy as np
import pandas as pd
import random

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import datetime
from datetime import timedelta,datetime, date

In [140]:
cust_types = {'Young Professional': { 'min_income':20000, 'max_income':100000, 'proportion':0.18},
              'Experienced Professional': { 'min_income':50000, 'max_income':200000, 'proportion':0.28},
              'Parent': { 'min_income':50000, 'max_income':200000, 'proportion':0.15},
              'College Student': { 'min_income':5000, 'max_income':25000, 'proportion':0.09},
              'School Student': { 'min_income':2000, 'max_income':8000, 'proportion':0.05},
              'Retired': { 'min_income':10000, 'max_income':50000, 'proportion':0.2},
              'Unemployed': { 'min_income':1000, 'max_income':5000, 'proportion':0.05}}

exp_types = ['Groceries','Clothing','Housing','Education','Health',
             'Motor/Travel','Entertainment','Gambling','Savings',
             'Bills and Utilities','Tax','Fines']

exp_prop = {'Young Professional':[0.04,0.12,0.16,0.08,0.04,0.08,0.24,0.08,0.04,0.04,0.04,0.04],
        'Experienced Professional':[0.08,0.08,0.15,0.04,0.08,0.08,0.15,0.08,0.08,0.08,0.08,0.04],
        'Parent':[0.15,0.04,0.23,0.15,0.08,0.04,0.04,0.08,0.04,0.08,0.04,0.04],
        'College Student':[0.09,0.04,0.09,0.35,0.04,0.09,0.13,0.04,0.04,0.04,0,0.04],
        'School Student':[0.05,0.02,0,0.05,0,0.19,0.65,0,0.05,0,0,0],
        'Retired':[0.09,0.04,0.22,0,0.18,0.07,0.15,0.15,0,0.07,0.01,0.01],
        'Unemployed':[0.1,0,0.2,0,0.05,0.4,0,0.1,0,0.1,0,0.05]}

exp_behave = {'Exclude':[0,0,0.5,0.5,0,0.2,0,0.8,0.5,0,0,0.9],
        'Exclude Monthly':[0,0.5,0,0,0.5,0.2,0,0.1,0.2,0,0,0.9],
        'Annual Inflation':[0.08,0.08,0.05,0.02,0.05,0.2,0.02,0.05,0,0.02,0,0],
        'Holiday Increase':[1,1,0,0,0,1,1,1,0,0,0,0],
        'Regular Monthly Min':[0,0,1,1,0,0,0,0,0,1,1,0],
        'Regular Monthly Max':[1,0,1,1,1,1,2,0,1,2,1,0],
        'Regular Monthly %':[0.1,0,1,0.9,0.8,0.5,0.2,0,0.8,1,1,0],
        'Sporatic Min':[4,1,0,0,2,4,5,5,1,0,0,44593],
        'Sporatic Max':[20,8,0,1,6,20,80,10,2,1,0,2],
        'Sporatic Variation':[0.05,0.1,0,0.2,0.2,0.2,0.3,0.5,0.5,0.2,0,0.5]}

In [20]:
def base_income(cust_type):
    
    #Get customer income range 
    min_income = cust_types[cust_type]['min_income']
    max_income = cust_types[cust_type]['max_income']
    
    #Create random income within cusotmer range 
    income = round(np.random.uniform(min_income,max_income))
    return income

def time_range():
    
    #Start date
    add_month = np.random.randint(0,120)
    start_dt = date(2010, 1, 1) + pd.DateOffset(months=add_month)
    
    #End date
    add_month  = np.random.randint(6,120)
    end_dt = start_dt + pd.DateOffset(months=add_month)
    
    #End date cutoff 
    cutoff = date(2020, 12, 1)+ pd.DateOffset(months=0)
    if end_dt >= cutoff:
        end_dt = cutoff 
    
    return start_dt, end_dt

def months_between(start_dt,end_dt):
    
    return (end_dt. year - start_dt. year) * 12 + (end_dt.month - start_dt.month)

def month_adjust(start_dt,end_dt):
    
    #
    months = pd.date_range(start_dt,end_dt,freq='MS')
    inf_months = [months_between(date(2010, 1, 1),dt) for dt in months]
    
    adjust = []
    for m in months:
        
        #monthly exclude 
        exclude_monthly = [1-np.random.binomial(n=1,p=float(per)) for per in behave_dict['Exclude Monthly']]
        
        #inflation adjustment
        inf_months = months_between(date(2010, 1, 1),m)
        inf = [(1+i)**(inf_months/12) for i in exp_behave['Annual Inflation']]
        
        #Holiday increase
        hol_inc = [1]*len(inf)
        if m.month == 10:
            hol_inc += np.array(exp_behave['Holiday Increase'])*0.1
        elif m.month == 11:
            hol_inc += np.array(exp_behave['Holiday Increase'])*0.2
        elif m.month == 12:
            hol_inc += np.array(exp_behave['Holiday Increase'])*0.3
        
        #Bring together
        adjust.append(np.array(exclude_monthly)*inf*hol_inc)
        
    return adjust

start_dt, end_dt = time_range()
print(start_dt, end_dt)
print(months_between(start_dt,end_dt))
month_adj = month_adjust(start_dt,end_dt)
len(month_adj)

44359

In [148]:
def generate_transactions(cust_type):
    
    income = base_income(cust_type)
    start_dt, end_dt = time_range()
    
    base_exp = [income*p for p in exp_prop[cust_type]]
    
    
    exclude = [1-np.random.binomial(n=1,p=float(per)) for per in behave_dict['Exclude']]
    
    month_adj = month_adjust(start_dt,end_dt)
    #print(month_adj)
    
    final = np.array([m*exclude for m in month_adj])*(income/12)
    
    trans = []
    for i,f in enumerate(final):
        trans_dict = {'cust_type':cust_type, 
         'income':income, 
         'start_dt':start_dt, 
         'end_dt':end_dt,
        'month':start_dt+ pd.DateOffset(months=i),
         'month_n':(start_dt+ pd.DateOffset(months=i)).month,         
        'Groceries':f[0],
        'Clothing':f[1],
        'Housing':f[2],
         'Education':f[3],
         'Health':f[4],
         'Motor/Travel':f[5],
         'Entertainment':f[6],
         'Gambling':f[7],
         'Savings':f[8],
        'Bills and Utilities':f[9],
         'Tax':f[10],
         'Fines':f[11]
        }
        trans.append(trans_dict)
    
    
    
    #print(final)
    
    return trans

generate_transactions('Young Professional')

[{'cust_type': 'Young Professional',
  'income': 65702,
  'start_dt': Timestamp('2014-07-01 00:00:00'),
  'end_dt': Timestamp('2020-12-01 00:00:00'),
  'month': Timestamp('2014-07-01 00:00:00'),
  'month_n': 7,
  'Groceries': 7741.127909698928,
  'Clothing': 0.0,
  'Housing': 6819.447490169392,
  'Education': 0.0,
  'Health': 6819.447490169392,
  'Motor/Travel': 12436.923158739448,
  'Entertainment': 5985.46804806808,
  'Gambling': 0.0,
  'Savings': 0.0,
  'Bills and Utilities': 5985.46804806808,
  'Tax': 5475.166666666667,
  'Fines': 0.0},
 {'cust_type': 'Young Professional',
  'income': 65702,
  'start_dt': Timestamp('2014-07-01 00:00:00'),
  'end_dt': Timestamp('2020-12-01 00:00:00'),
  'month': Timestamp('2014-08-01 00:00:00'),
  'month_n': 8,
  'Groceries': 7790.934559755319,
  'Clothing': 0.0,
  'Housing': 6847.230763380432,
  'Education': 0.0,
  'Health': 0.0,
  'Motor/Travel': 12627.325866945592,
  'Entertainment': 5995.35353517951,
  'Gambling': 0.0,
  'Savings': 5475.16666666

In [168]:
n = 1000

test_df = []
for c in cust_types.keys():
    cust_n = round(n*cust_types[c]['proportion'])
    for i in range(cust_n):
        for x in generate_transactions(c):
            test_df.append(x)
        
test_df = pd.DataFrame(test_df,
                       columns=['cust_type', 'income', 'start_dt', 'end_dt','month','month_n',
                                'Groceries','Clothing','Housing','Education','Health',
                                 'Motor/Travel','Entertainment','Gambling','Savings',
                                 'Bills and Utilities','Tax','Fines'])
test_df


Unnamed: 0,cust_type,income,start_dt,end_dt,month,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines
0,Young Professional,79655,2010-02-01,2019-12-01,2010-02-01,2,6680.625222,6680.625222,0.000000,6648.879726,6664.960361,6739.539656,6648.879726,0.000000,6637.916667,6648.879726,6637.916667,0.0
1,Young Professional,79655,2010-02-01,2019-12-01,2010-03-01,3,6723.608566,0.000000,0.000000,6659.860891,6692.114234,6842.718440,6659.860891,0.000000,0.000000,6659.860891,6637.916667,0.0
2,Young Professional,79655,2010-02-01,2019-12-01,2010-04-01,4,6766.868466,6766.868466,0.000000,6670.860193,0.000000,6947.476836,6670.860193,0.000000,6637.916667,6670.860193,6637.916667,0.0
3,Young Professional,79655,2010-02-01,2019-12-01,2010-05-01,5,6810.406702,6810.406702,0.000000,6681.877661,6746.754317,7053.839027,6681.877661,0.000000,6637.916667,6681.877661,6637.916667,0.0
4,Young Professional,79655,2010-02-01,2019-12-01,2010-06-01,6,6854.225063,6854.225063,0.000000,6692.913325,0.000000,0.000000,6692.913325,0.000000,6637.916667,6692.913325,6637.916667,0.0
5,Young Professional,79655,2010-02-01,2019-12-01,2010-07-01,7,6898.325354,6898.325354,0.000000,6703.967216,6801.840527,7271.473386,6703.967216,0.000000,6637.916667,6703.967216,6637.916667,0.0
6,Young Professional,79655,2010-02-01,2019-12-01,2010-08-01,8,6942.709387,0.000000,0.000000,6715.039362,6829.552067,0.000000,6715.039362,0.000000,6637.916667,6715.039362,6637.916667,0.0
7,Young Professional,79655,2010-02-01,2019-12-01,2010-09-01,9,6987.378988,0.000000,0.000000,6726.129796,0.000000,7495.822488,6726.129796,0.000000,0.000000,6726.129796,6637.916667,0.0
8,Young Professional,79655,2010-02-01,2019-12-01,2010-10-01,10,7735.569594,0.000000,0.000000,6737.238546,0.000000,0.000000,7410.962401,0.000000,6637.916667,6737.238546,6637.916667,0.0
9,Young Professional,79655,2010-02-01,2019-12-01,2010-11-01,11,8493.098708,8493.098708,0.000000,6748.365643,0.000000,9272.512206,8098.038772,0.000000,6637.916667,6748.365643,6637.916667,0.0


# Tests

In [178]:
test_df['Housing_miss'] = [1 if x != 0 else 0 for x in test_df['Housing']]
test_df['Fines_miss'] = [1 if x != 0 else 0 for x in test_df['Fines']]

In [179]:
test_df[test_df.month_n==1].groupby(['cust_type']).mean()

Unnamed: 0_level_0,income,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines,Housing_miss,Fines_miss
cust_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
College Student,14966.329231,1.0,2112.864872,1210.729527,864.861413,689.939506,841.782754,2875.195938,1423.723767,132.431494,459.051538,1423.723767,1247.194103,0.0,0.489231,0.0
Experienced Professional,126426.954631,1.0,17512.415926,8690.552526,6313.899546,5766.82665,6897.573634,21751.148832,11964.886737,2896.285472,4051.570573,11964.886737,10535.579553,110.057656,0.457467,0.014178
Parent,133536.463122,1.0,18663.64851,9048.699524,6881.018559,5905.913648,7674.629242,25299.329956,12668.518231,1888.568225,4364.943539,12668.518231,11128.038593,27.178674,0.473413,0.001715
Retired,29711.363636,1.0,4131.489006,2055.534957,1753.201852,1470.266706,1642.237053,5763.134382,2814.980637,550.457362,1065.464719,2814.980637,2475.94697,37.55974,0.48961,0.012987
School Student,4369.718563,1.0,615.788292,297.333456,248.427259,234.631585,289.499782,787.994415,415.190385,152.15676,173.042914,415.190385,364.143214,2.493014,0.508982,0.005988
Unemployed,3122.715152,1.0,463.619107,237.173474,192.896323,148.154991,192.137692,614.84974,301.003971,46.198874,99.833838,301.003971,260.226263,11.188384,0.472727,0.042424
Young Professional,61295.553161,1.0,8416.584853,4049.172359,3534.863123,3019.626811,3702.82121,11503.727395,5786.601239,1536.955472,1869.090398,5786.601239,5107.962763,28.69648,0.520115,0.005747


In [180]:
test_df[test_df.month_n==10].groupby(['cust_type']).mean()

Unnamed: 0_level_0,income,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines,Housing_miss,Fines_miss
cust_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
College Student,14885.039437,10.0,2380.117516,1249.752187,869.540911,673.432393,907.885626,3276.860355,1568.47667,172.634796,486.537559,1425.887882,1240.419953,25.876526,0.490141,0.016901
Experienced Professional,126144.064572,10.0,19727.998781,9723.599924,6410.36331,5858.495299,7325.110656,26156.50987,13214.39637,3213.820791,3744.900015,12013.087609,10512.005381,153.813845,0.458988,0.013089
Parent,133558.321767,10.0,21158.545929,10255.246833,7040.53622,6064.21401,8350.656984,30521.191194,14038.690122,1988.596432,4197.188617,12762.445565,11129.860147,32.275368,0.473186,0.003155
Retired,29675.768765,10.0,4672.967448,2225.112448,1791.980176,1513.986971,1670.586959,7065.235334,3114.612463,578.884967,1014.090496,2831.465875,2472.98073,22.576776,0.493947,0.008475
School Student,4443.163043,10.0,715.919901,371.720775,246.606651,241.816025,263.98245,883.052041,468.877466,169.129258,192.205616,426.252242,370.263587,0.978261,0.494565,0.005435
Unemployed,3133.312169,10.0,525.882259,258.247394,194.922481,150.471236,195.576216,697.283406,334.413874,51.125313,96.15873,304.012613,261.109347,5.709877,0.470899,0.015873
Young Professional,60935.115127,10.0,9532.318947,4798.548304,3519.671563,3087.426293,3538.659269,13619.527407,6382.869588,1534.399641,1839.274096,5802.608716,5077.926261,34.542392,0.511379,0.006693


In [181]:
test_df[test_df.month_n==11].groupby(['cust_type']).mean()

Unnamed: 0_level_0,income,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines,Housing_miss,Fines_miss
cust_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
College Student,14995.988827,11.0,2620.818588,1136.892148,859.129087,669.687493,905.036136,3779.767399,1724.665864,208.621607,463.676676,1437.221553,1249.665736,1.414339,0.480447,0.002793
Experienced Professional,126196.405382,11.0,21632.801874,10834.894892,6497.559679,5810.988334,7598.052108,29589.827972,14438.228213,3431.238004,3905.090784,12031.856844,10516.367115,94.678385,0.462674,0.011285
Parent,133101.393417,11.0,23102.623512,11600.886525,7022.151348,5978.320267,8036.135789,33254.744396,15279.357497,2555.785332,4042.484195,12732.797914,11091.782785,32.64198,0.473354,0.003135
Retired,29633.677033,11.0,5111.710522,2712.257641,1786.996548,1521.326644,1691.163711,7702.230891,3396.437959,703.04675,1060.605263,2830.364966,2469.473086,35.522727,0.490431,0.014354
School Student,4470.274194,11.0,788.524029,410.566303,248.498491,245.581689,262.992921,1145.97674,515.026017,191.765794,192.871864,429.188348,372.522849,2.238351,0.494624,0.005376
Unemployed,3123.809524,11.0,575.429564,292.742871,193.806448,150.035072,173.192619,835.702603,364.284011,52.88668,94.713845,303.57001,260.31746,8.954586,0.465608,0.026455
Young Professional,60889.801577,11.0,10428.609576,5309.17239,3566.074021,3039.03456,3630.820491,15043.230022,6963.322255,1685.800312,1863.586837,5802.768546,5074.150131,23.621003,0.51774,0.005256


In [182]:
test_df[test_df.month_n==12].groupby(['cust_type']).mean()

Unnamed: 0_level_0,income,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines,Housing_miss,Fines_miss
cust_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
College Student,15015.280992,12.0,2851.98892,1251.50457,851.939576,670.375042,857.103172,4410.409706,1872.269828,222.048727,480.839991,1440.20756,1251.273416,9.591139,0.473829,0.00551
Experienced Professional,125972.985395,12.0,23481.213484,11389.731059,6575.709045,5865.845139,7563.690407,31243.797646,15628.348634,3593.306905,3820.479525,12021.806641,10497.748783,104.931057,0.467354,0.012027
Parent,133012.145086,12.0,25109.877154,11970.809577,6981.626122,5968.87829,7460.554744,36666.535302,16558.288265,2530.527526,4421.75468,12737.144819,11084.345424,77.715679,0.469579,0.0078
Retired,29564.51716,12.0,5534.094796,2663.342006,1786.660048,1514.728198,1789.971835,8306.92175,3672.060724,751.195252,1055.069724,2824.662095,2463.709763,16.837673,0.489941,0.004734
School Student,4458.149733,12.0,854.785902,441.315867,247.407263,238.575334,278.539567,1142.022979,556.840449,212.465919,175.902406,428.338807,371.512478,0.0,0.486631,0.0
Unemployed,3134.505208,12.0,629.945178,310.82289,197.295844,152.595951,188.17337,921.167729,396.703004,68.045535,105.784288,305.156157,261.208767,8.523872,0.473958,0.026042
Young Professional,61174.950649,12.0,11380.288102,5879.349559,3602.009711,3053.876774,3498.957193,17458.977614,7583.347213,1942.627549,1786.189935,5833.34401,5097.912554,27.529545,0.518182,0.003896


In [173]:
test_df[test_df.month_n==12].groupby(['cust_type']).count()

Unnamed: 0_level_0,income,start_dt,end_dt,month,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines
cust_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
College Student,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363,363
Experienced Professional,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164
Parent,641,641,641,641,641,641,641,641,641,641,641,641,641,641,641,641,641
Retired,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845
School Student,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187
Unemployed,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192
Young Professional,770,770,770,770,770,770,770,770,770,770,770,770,770,770,770,770,770


In [175]:
test_df.groupby(['month']).mean()

Unnamed: 0_level_0,income,month_n,Groceries,Clothing,Housing,Education,Health,Motor/Travel,Entertainment,Gambling,Savings,Bills and Utilities,Tax,Fines
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-01-01,106148.500000,1.0,8845.708333,6748.858333,4440.575000,1750.558333,3722.908333,3507.358333,8845.708333,669.533333,3296.316667,8845.708333,8845.708333,745.500000
2010-02-01,101817.894737,2.0,8539.416178,5176.544878,3973.130115,3226.622875,4303.976943,4428.187646,8498.837939,1857.039496,3158.609649,8498.837939,8484.824561,0.000000
2010-03-01,106043.772727,3.0,8951.061687,3833.014367,4376.153367,2980.924411,3373.525790,4967.100346,8866.195151,2392.111682,3716.617424,8866.195151,8836.981061,0.000000
2010-04-01,103211.640000,4.0,8768.057147,3335.142299,4681.691599,3436.648288,4680.362149,5300.840659,8643.656026,2277.747497,3192.606667,8643.656026,8600.970000,0.000000
2010-05-01,98449.566667,5.0,8417.319548,4045.820190,4270.965251,3543.110613,4268.641656,4185.738308,8258.464129,2431.733930,1582.913889,8258.464129,8204.130556,0.000000
2010-06-01,97957.343750,6.0,8429.121595,4259.923357,4416.305986,3327.152202,3280.354974,5324.545607,8230.745230,2156.384645,3872.635417,8230.745230,8163.111979,0.000000
2010-07-01,84580.853659,7.0,7324.916794,4624.776321,4046.139721,2799.707986,3967.688066,4971.711052,7118.539577,1698.684098,3162.693089,7118.539577,7048.404472,0.000000
2010-08-01,82467.625000,8.0,7187.857061,2702.858154,4061.243873,2902.082355,3102.085350,4345.882829,6952.147988,1739.868865,2388.112847,6952.147988,6872.302083,0.000000
2010-09-01,81938.888889,9.0,7187.722937,3751.131387,3987.834414,3225.875225,4537.514032,3878.284815,6918.983140,1307.333020,2738.350309,6918.983140,6828.240741,0.000000
2010-10-01,81434.180328,10.0,7908.351884,3786.340865,4050.156797,3309.071322,4094.369738,5297.414187,7576.494238,2074.872619,2777.448087,6887.722035,6786.181694,90.909836


In [154]:
test_df['start_dt'].value_counts()

2012-07-01    6473
2012-06-01    6163
2012-11-01    6073
2013-12-01    6063
2011-09-01    6050
2010-12-01    6046
2013-04-01    5894
2013-02-01    5864
2014-01-01    5854
2010-11-01    5832
2010-03-01    5823
2013-01-01    5806
2012-05-01    5795
2013-05-01    5778
2011-08-01    5726
2010-10-01    5723
2011-10-01    5690
2010-06-01    5668
2011-02-01    5609
2010-07-01    5555
2014-06-01    5541
2012-08-01    5538
2012-02-01    5522
2010-08-01    5516
2013-11-01    5496
2010-04-01    5485
2010-01-01    5350
2014-05-01    5268
2012-12-01    5250
2012-10-01    5242
              ... 
2017-01-01    2978
2017-04-01    2858
2018-02-01    2791
2017-09-01    2786
2016-12-01    2757
2018-01-01    2635
2018-08-01    2629
2018-05-01    2625
2018-06-01    2605
2018-03-01    2598
2018-12-01    2501
2017-07-01    2457
2017-10-01    2421
2017-12-01    2346
2018-11-01    2193
2019-02-01    2136
2019-01-01    1996
2018-09-01    1944
2018-07-01    1889
2018-10-01    1685
2019-04-01    1658
2019-03-01  

In [273]:
test_df['end_dt'].value_counts()

2020-12-01    4261
2019-11-01      97
2018-12-01      94
2020-10-01      93
2019-10-01      91
2020-02-01      87
2019-12-01      87
2020-01-01      86
2018-07-01      85
2019-09-01      85
2020-07-01      85
2020-06-01      84
2020-08-01      83
2018-04-01      83
2020-09-01      81
2018-06-01      80
2019-04-01      79
2018-03-01      78
2019-05-01      78
2020-03-01      78
2019-03-01      77
2018-11-01      77
2019-06-01      77
2019-08-01      76
2018-10-01      75
2020-04-01      75
2018-08-01      74
2020-05-01      73
2019-07-01      73
2018-02-01      72
              ... 
2012-08-01      22
2012-06-01      20
2012-12-01      20
2012-09-01      20
2011-12-01      19
2012-11-01      19
2012-07-01      18
2012-10-01      18
2013-01-01      18
2013-02-01      17
2012-02-01      16
2012-03-01      13
2012-05-01      12
2011-10-01      12
2011-06-01      12
2011-09-01      11
2011-07-01      11
2012-01-01      10
2011-03-01       9
2011-08-01       8
2011-04-01       7
2011-11-01  

KeyError: 'U'

In [225]:
cust_type = 'Young Professional'
income = base_income(cust_type)
start_dt, end_dt = time_range()
print(cust_type,income,start_dte, end_dt)

35454

(Timestamp('2011-04-01 00:00:00'), Timestamp('2012-05-01 00:00:00'))

In [43]:
from datetime import timedelta, date

def daterange(date1, date2):
    for n in range(int ((date2 - date1).days)+1):
        yield date1 + timedelta(n)

start_dt = date(2015, 12, 20)
end_dt = date(2016, 1, 11)

6.231947423198527

In [None]:
prop = pd.read_csv('/Users/conorosully/Downloads/Figures - cust_type.csv')
prop_np = prop.to_numpy()

prop_dict = {}

for i in range(len(cust_types)):
    prop_dict[prop_np[i][0]] = list(prop_np[i][1:])

prop_dict