In [None]:
import pandas as pd # CSV reading library
import matplotlib.pyplot as plt # plotting library
import matplotlib.dates as mdates

from glob import glob

%matplotlib inline 

In [None]:
# Read the csv and print it
history = pd.read_csv("travel_history.csv")
history

In [None]:
# Let's create a graph of my Oyster balance

# First, we parse the date column to a proper date object.
date = pd.to_datetime(history.Date, infer_datetime_format=True)

# Set up the plot
plt.title("Balance")
plt.xlabel("Date")
plt.ylabel("GBP")

plt.plot(date, history.Balance)

# Tweak the axis a bit to be readable
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_minor_locator(mdates.DayLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))

In [None]:
# Let's look at only my bus journeys
only_bus = history[history['Journey/Action'].str.contains('Bus')]
only_bus.groupby('Journey/Action')['Journey/Action'].count()

# Can you guess what my favourite bus is?

In [None]:
# Let's do the same for the tube, but both from AND to destination!

only_tube = history[~history['Journey/Action'].str.contains('Auto|Bus')]

# Split the two stations
from_to = only_tube['Journey/Action'].str.split(' to ', expand=True)
from_to.columns = ['from', 'to']

total = pd.concat([from_to['from'], from_to['to']], ignore_index=True)
total.groupby(total).count().sort_values(0, ascending=False)

# Now you can even guess where I live and work!

In [None]:
start_time = pd.to_datetime(history['Start Time'], infer_datetime_format=True)
plt.hist(start_time[history['Journey/Action'].str.contains('Bus')].dt.hour)

In [None]:
bus_start = start_time[history['Journey/Action'].str.contains('Bus')].dt.hour
tube_start = start_time[history['Journey/Action'].str.contains('to')].dt.hour


plt.hist([bus_start,tube_start], stacked=True)

In [None]:
weekday = date.apply(lambda x: x.weekday())

In [None]:
plt.hist(start_time[weekday < 5].dt.hour)

In [None]:
plt.hist(start_time[weekday >= 5].dt.hour)

In [None]:
victor_history = pd.concat([pd.read_csv(csv_file) for csv_file in glob('travel_history_victor/*')])
victor_history.Date = pd.to_datetime(victor_history.Date, infer_datetime_format=True)
victor_history = victor_history.sort_values('Date')

In [None]:
date = pd.to_datetime(victor_history.Date, infer_datetime_format=True)

# Set up the plot
plt.title("Balance")
plt.xlabel("Date")
plt.ylabel("GBP")

plt.plot(date, victor_history.Balance)

In [None]:
date = pd.to_datetime(victor_history.Date, infer_datetime_format=True)

weekday = date.apply(lambda x: x.weekday())
start_time = pd.to_datetime(victor_history['Start Time'], infer_datetime_format=True)

In [None]:
plt.hist(start_time[weekday < 5].dt.hour)

In [None]:
plt.hist(start_time[weekday >= 5].dt.hour)