In [None]:
import pickle

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# load uxsim_data.pkl
with open('results/uxsim_data.pkl', 'rb') as f:
    uxsim_data = pickle.load(f)
with open('results/parked_dict.pkl', 'rb') as f:
    parked_dict = pickle.load(f)

In [None]:
city_areas = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 28, 29, 31, 34, 41, 43, 44, 45]

trips_by_hour_chances = pd.read_pickle("../data/trips_by_hour_chances.pickle")
trip_counts_distribution = pd.read_pickle("../data/trip_counts_distribution.pickle")

### UXsim data processing

In [None]:
combined_df = pd.concat(uxsim_data, names=['time'])
combined_df = combined_df.reset_index(level=1, drop=True)
combined_df = combined_df.set_index('area', append=True)
# Round the time (index) to 3 decimal places
combined_df.index = combined_df.index.set_levels(combined_df.index.levels[0].round(3), level=0)
combined_df = combined_df.drop('n_links', axis=1)
combined_df.head()

In [None]:
# Remove negative values
print(f"Number of negative values: {combined_df[combined_df < 0].count().sum()}")
combined_df = combined_df[combined_df >= 0]

In [None]:
# Convert to long form
long_df = combined_df.stack().reset_index()
long_df.columns = ['time', 'area', 'variable', 'value']
# Only keep rows with area in city_areas
long_df = long_df[long_df['area'].isin(city_areas)]
long_df.head(6)

In [None]:
variables = long_df['variable'].unique()

fig, axs = plt.subplots(3, 2, figsize=(15, 15))
axs = axs.flatten()

for i, variable in enumerate(variables):
    sns.lineplot(data=long_df[(long_df['variable'] == variable)], x='time', y='value', hue='area', palette='rocket', ax=axs[i])
    axs[i].set_ylim(bottom=0)
    axs[i].set_title(f"{variable} in different areas")
    axs[i].set_ylabel(variable)
    axs[i].set_xlabel('Time of day (hour)')
# Save as png
plt.savefig('img/uxsim_data.png', dpi=300, bbox_inches='tight')

### Input data visualization (for comparison)

In [None]:
start_time = int(long_df['time'].min())
end_time = int(long_df['time'].max())

In [None]:
# For a weekday, take the average of days 0-3 (Monday-Thursday)
trips_by_hour_chance = trips_by_hour_chances.iloc[:, 0:4].mean(axis=1).drop("Total")
# Drop the hours that are not in the range of the model and save as a dictionary
trips_by_hour_chance = trips_by_hour_chance.loc[start_time:(end_time)]
# Set column name
trips_by_hour_chance.name = 'Chance'
# To df
trips_by_hour_chance = trips_by_hour_chance.reset_index()
# Set hour as int
trips_by_hour_chance['Hour'] = trips_by_hour_chance['Hour'].astype(int)
trips_by_hour_chance.head()

In [None]:
# Plot trips_by_hour_chances series
fig, ax = plt.subplots(figsize=(10, 5))
sns.barplot(data=trips_by_hour_chance, x='Hour', y='Chance', ax=ax)
ax.set_title('Trip chances per Agent per hour')
ax.set_ylabel('Chance of taking trip')
ax.set_xlabel('Time of day (hour)')

### Parking data visualization

In [None]:
# Convert parked_dict to DataFrame
parked_df = pd.DataFrame(parked_dict)
# Long form
long_parked_df = parked_df.stack().reset_index()
# Rename columns
long_parked_df.columns = ['area', 'time', 'value']
long_parked_df = long_parked_df.set_index(['time', 'area'])

In [None]:
# Plot
fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(data=long_parked_df, x='time', y='value', hue='area', palette='rocket', ax=ax)
ax.set_title('Parked cars in different areas')
ax.set_ylabel('Parked cars')
ax.set_xlabel('Time of day (hour)')

# Save image
plt.savefig('img/parked_data.png', dpi=300, bbox_inches='tight')