# Reoccurance Notebook 
* How often to we return to the same census tract for Sanitation Requetss 

In [99]:
import intake 
import pandas as pd 
import geopandas as gpd 
import altair as alt

In [100]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [101]:
df = catalog.care311.read()

In [102]:
print(f"there have been {len(df)} requests since {df.createddate.min()}")

there have been 103719 requests since 2015-08-05T10:37:55.000


In [103]:
print(f"of those, {len(df[~df['closeddate'].isnull()])} have been closed")

of those, 93527 have been closed


In [104]:
df['closeddate'] = pd.to_datetime(df.closeddate)
df['createddate'] = pd.to_datetime(df.createddate)

## Create a solve time

In [105]:
## time to solve
df.solve_time = df[['createddate', 'closeddate']].diff(axis=1)['closeddate']

  super(GeoDataFrame, self).__setattr__(attr, val)


## Make a chart

In [106]:
df = df.set_index('closeddate')
source = pd.DataFrame(df.groupby([pd.Grouper(freq='M'), 'createdbyuserorganization']).count()['srnumber'])

In [107]:
chart = alt.Chart(source.reset_index()).mark_bar().encode(
    x = alt.X('closeddate:O'),
    y= alt.Y('sum(srnumber)'),
    color='createdbyuserorganization'
).properties(
    title='Number of Homelessness Encampment SRs closed by Month'
)

chart.encoding.y.title = 'Number of SR Encampments'
chart.encoding.x.title = 'Month'
chart

In [108]:
tracts = catalog.census_tracts_with_pop.read()

In [109]:
df = df[~df.geometry.isnull()]
df = df.to_crs(epsg=2229)

In [110]:
merged = gpd.sjoin(df, tracts, how="inner", op='intersects')
print(f'length of merged df is {len(merged)}')

  "(%s != %s)" % (left_df.crs, right_df.crs)


length of merged df is 103416


# Analysis 
* now that the dataset is merged, we want to calculate how long it takes for a ticket, on average, to get opened up again in the same tract
* There are two metrics we want to track here 1) `closed->next closure` and then `closed->next opened` 

In [111]:
## closed to next closure
# drop computed column if exists 
if 'closed2closed' in merged:
    merged = merged.drop('closed2closed', axis =1)

def compute_stat(group):
    """
    return the series and use in the apply 
    """
    return group.index.to_series().diff(periods=-1)
    
grouped = merged.groupby('GEOID10', as_index="False")

s = pd.Series(grouped.apply(compute_stat))
df = s.reset_index()

In [112]:
closed2closed = s.reset_index().rename({'level_1': 'closeddate', 0: 'closed2closed'}, axis=1)

In [84]:
merged = pd.merge(merged.reset_index().rename({'level_0': 'closeddate'}, axis=1), closed2closed)

In [85]:
print(f'the mean between is {merged.closed2closed.mean()}')
print(f'the median between is {merged.closed2closed.median()}')

the mean between is 11 days 12:31:31.326335
the median between is 0 days 00:00:16


In [97]:
grouped = merged[['closeddate','createddate','GEOID10']].groupby(['GEOID10'])

for name, group in grouped: 
    print(group.sort_values('closeddate', ascending=True))

               closeddate         createddate      GEOID10
93767 2017-06-14 08:01:59 2017-04-02 15:58:26  06037101110
93765 2017-12-29 07:12:12 2017-12-21 19:30:37  06037101110
93766 2017-12-29 07:12:21 2017-11-27 17:29:49  06037101110
93764 2018-05-21 15:02:13 2018-01-07 21:16:43  06037101110
               closeddate         createddate      GEOID10
78015 2017-06-14 08:29:27 2017-04-17 11:56:07  06037101210
78014 2017-06-14 08:29:36 2017-04-17 11:57:58  06037101210
78012 2017-07-18 10:41:19 2017-07-06 11:04:59  06037101210
78011 2017-07-18 10:42:06 2017-07-14 15:08:09  06037101210
78013 2017-07-25 09:55:06 2017-06-26 09:41:35  06037101210
78010 2017-09-08 11:35:19 2017-07-20 06:48:36  06037101210
78007 2019-06-07 08:07:46 2019-05-12 16:34:24  06037101210
78008 2019-06-07 08:19:28 2019-03-03 18:45:22  06037101210
78009 2019-06-07 08:19:37 2019-02-14 16:05:34  06037101210
78005 2019-10-24 13:45:16 2019-10-16 07:33:10  06037101210
78006 2019-10-29 06:35:33 2019-07-27 11:34:07  060371012