In [1]:
import numpy as np
import pandas as pd

from bokeh.charts import Bar, TimeSeries
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, output_file

In [2]:
def plot_ts_repairs_for_property(input_ds, property_id, from_date, to_date):
    # Select data for this particular property
    prop_max_repairs = df_repairs[(df_repairs['property-reference'] == property_id)]
    # Create a time series for all days between 'from_date' and 'to_date'
    all_dates = pd.date_range(from_date, to_date, freq='D')
    repairs_period = pd.DataFrame({'dates': all_dates})
    # Prepare the repare data
    df_dates = pd.DataFrame({'logged-date': prop_max_repairs['logged-date'], 
                             'repairs': [1]*len(prop_max_repairs)})
    grp_by_dates = df_dates.groupby('logged-date', as_index=False).sum()
    # Join the repair data with the time series
    join_df = pd.merge(repairs_period, grp_by_dates, left_on='dates', 
                   right_on='logged-date', how='left')
    del join_df['logged-date']
    # All the dates that didn't exist in the repair data have 0 repairs
    repairs_dates_max_repairs = join_df.fillna(0)
    return repairs_dates_max_repairs

In [9]:
properties = pd.read_csv("data/housing-repairs-properties.csv", encoding="utf-8", index_col=False)

property_0 = properties.loc[0, 'property']
property_1 = properties.loc[1, 'property']
property_2 = properties.loc[2, 'property']
property_3 = properties.loc[3, 'property']

In [10]:
df_repairs = pd.read_csv("data/all-historical-repairs-mini.csv",
                         encoding="utf-8", index_col=False, parse_dates=['logged-date'])

test_all_repairs_prop0 = plot_ts_repairs_for_property(df_repairs, property_0, 
                                                            '2014-03-28', '2015-03-27')

print len(test_all_repairs_prop0)

365


In [11]:
ts_plot = TimeSeries(data=test_all_repairs_prop0, x='dates', y='repairs', color='green',
                     title='Repair pattern for property {0} between 2014 and 2016'.format(property_0), plot_width=600)

output_notebook()
#output_file("data/plots/all-years-repair-profile.html")
show(ts_plot)

In [12]:
df_repairs2 = pd.read_csv('data/historical-repairs-2014-merge.csv',
                         encoding="utf-8", index_col=False, parse_dates=['logged-date'])

# Property with 1 repair
repairs_2014_prop1 = plot_ts_repairs_for_property(df_repairs2, property_1, 
                                                        '2014-01-01', '2014-12-31')

# Property with 5 repairs
repairs_2014_prop2 = plot_ts_repairs_for_property(df_repairs2, property_2, 
                                                        '2014-01-01', '2014-12-31')

# Property with 10 repairs
repairs_2014_prop3 = plot_ts_repairs_for_property(df_repairs2, property_3, 
                                                        '2014-01-01', '2014-12-31')

data_to_plot = {'Dates': repairs_2014_prop1['dates'],
                'Profile 1 repair': repairs_2014_prop1['repairs'],
                'Profile 5 repairs' : repairs_2014_prop2['repairs'],
                'Profile 10 repairs': repairs_2014_prop3['repairs']}

ts_plot2 = TimeSeries(data_to_plot, x='Dates', 
                      y=['Profile 1 repair', 'Profile 5 repairs', 'Profile 10 repairs'], 
                      color=['Profile 1 repair', 'Profile 5 repairs', 'Profile 10 repairs'],
                      title='Repair pattern for 3 properties in 2014', legend=True,
                      ylabel='Number of repairs', plot_width=1200)

output_notebook()
output_file("data/plots/3-repair-patterns-2014.html")
show(ts_plot2)