In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from my_src import my_graphs

# When saving svg plots, we use regular text for plot text. This stops the default
# behavior of outputting curves for each letter of text and saves a lot of memory
# per graph.

plt.rcParams['svg.fonttype'] = 'none'
violin_plot_grid_size = 20
violin_plot_fig_size = (12, 10)

In [None]:
data = pd.read_csv('data/uber-raw-data-aug14.csv')

In [None]:
print(data.info())
data.head()

In [None]:
data['Date/Time'] = pd.to_datetime(data['Date/Time'])
print(data.info())
data.head()

In [None]:
# Get hourly counts data.
pickup_hours = data['Date/Time'].apply(lambda x : x.hour)
hour_counts = data['Date/Time'].groupby(pickup_hours).count().sort_index()

In [None]:
plt.bar(hour_counts.index, hour_counts)
plt.title('Hourly Pickup Counts')
plt.xlabel('Hour (0 is Midnight)')
plt.ylabel('Count')
plt.tight_layout()
plt.savefig('graphs/all_hourly_hist.svg')
plt.close()

![All Hourly Pickups Histogram](files/graphs/all_hourly_hist.svg)

In [None]:
reduced_date = data['Date/Time'].apply(lambda x : (x.month, x.day, x.hour))
reduced_date = pd.DataFrame(reduced_date.tolist(), columns = ['month', 'day', 'hour'])
reduced_date.head()

In [None]:
# Now get hourly counts by day and hour.

hour_counts = (reduced_date.groupby(['day', 'hour'])
                .count()
                .rename(columns =  {'month' : 'count'}))
hour_counts.head()

In [None]:
# Get a graph of the hourly count distribution for all days and hours.

my_graphs.graph_distribution_by_hour(hour_counts, hour_blocks = [np.arange(24)], fig_size = (15, 6), grid_size = 60)
plt.title('All Hourly Pickup Count Distributions')
plt.tight_layout()
plt.savefig('graphs/all_hourly_pickups.svg')
plt.close()
print('Graph saved.')

In [None]:
fig, axes = my_graphs.graph_violins(hour_counts, fig_size = violin_plot_fig_size, grid_size = violin_plot_grid_size)
for ax, title in zip(axes, ['Mornings', 'Close to Noon', 'Afternoon to Night']):
    ax.set_title('Hourly Pickup Distributions for ' + title)
plt.tight_layout()
plt.savefig('graphs/all_hourly_pickups.svg')
plt.close()

![Violin_plot](files/graphs/all_hourly_pickups.svg)

Let's look at including whether the day is on the weeked or not.

In [None]:
initial_day = 4
day_of_week = hour_counts.index.to_series().apply(lambda x : (x[0] - 1 + initial_day) % 7)
day_of_week.value_counts().sort_index()

In [None]:
is_weekend = day_of_week > 4
weekend_counts = hour_counts.loc[is_weekend, ['count']]
fig, axes = my_graphs.graph_violins(weekend_counts, fig_size = violin_plot_fig_size, grid_size = violin_plot_grid_size)
for ax, title in zip(axes, ['Mornings', 'Close to Noon', 'Afternoon to Night']):
    ax.set_title('Weekend Hourly Pickup Distributions for ' + title)
plt.tight_layout()
plt.savefig('graphs/weekend_pickups.svg')
plt.close()

![Weekend Hourly Pickups](files/graphs/weekend_pickups.svg)

In [None]:
weekday_counts = hour_counts.loc[~is_weekend, ['count']]
fig, axes = my_graphs.graph_violins(weekday_counts, fig_size = violin_plot_fig_size, grid_size = violin_plot_grid_size)
for ax, title in zip(axes, ['Mornings', 'Close to Noon', 'Afternoon to Night']):
    ax.set_title('Weekday Hourly Pickup Distributions for ' + title)
plt.tight_layout()
plt.savefig('graphs/weekday_pickups.svg')
plt.close()

![Weekday Pickups](files/graphs/weekday_pickups.svg)

In [None]:
is_day = (day_of_week >= 0) & (day_of_week < 4)
day_counts = hour_counts.loc[is_day, ['count']]

fig, axes = my_graphs.graph_violins(day_counts, fig_size = violin_plot_fig_size, grid_size = violin_plot_grid_size)
for ax, title in zip(axes, ['Mornings', 'Close to Noon', 'Afternoon to Night']):
    ax.set_title('Mon-Th Hourly Pickup Distributions for ' + title)
plt.tight_layout()
plt.savefig('graphs/mon_th_pickups.svg')
plt.close()

![Monday to Thursday Pickups](files/graphs/mon_th_pickups.svg)