forked from better/convoys
-
Notifications
You must be signed in to change notification settings - Fork 1
/
dob_violations.py
50 lines (43 loc) · 1.7 KB
/
dob_violations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from matplotlib import pyplot
import datetime
import pandas
import convoys.plotting
import convoys.utils
def run():
print('loading data')
df = pandas.read_pickle('examples/dob_violations.pickle')
print(df['issue_date'])
print(df['issue_date'].dtype)
print(df['issue_date'] < datetime.date(2018, 1, 1))
df = df[df['issue_date'] < datetime.date(2018, 1, 1)]
print('converting to arrays')
unit, groups, (G, B, T) = convoys.utils.get_arrays(
df, groups='type', created='issue_date',
converted='disposition_date',
unit='years', group_min_size=100)
for model in ['kaplan-meier', 'weibull']:
print('plotting', model)
pyplot.figure(figsize=(9, 6))
convoys.plotting.plot_cohorts(G, B, T, model=model, ci=0.95,
groups=groups, t_max=30)
pyplot.legend()
pyplot.xlabel(unit)
pyplot.savefig('dob-violations-%s.png' % model)
pyplot.figure(figsize=(9, 6))
df['bucket'] = df['issue_date'].apply(
lambda d: '%d-%d' % (5*(d.year//5), 5*(d.year//5)+4)
)
unit, groups, (G, B, T) = convoys.utils.get_arrays(
df, groups='bucket', created='issue_date',
converted='disposition_date',
unit='years', group_min_size=500)
convoys.plotting.plot_cohorts(G, B, T, model='kaplan-meier',
groups=groups, t_max=30, ci=0.95)
convoys.plotting.plot_cohorts(G, B, T, model='weibull',
groups=groups, t_max=30,
plot_kwargs={'linestyle': '--'})
pyplot.legend()
pyplot.xlabel(unit)
pyplot.savefig('dob-violations-combined.png')
if __name__ == '__main__':
run()