In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Reading the file

In [14]:
errand_df = pd.read_parquet('Data/errands.parquet')
errand_df.head()


In [None]:
errand_df.columns 

It seems there are errands that have been run as a test. There is no documentation on them. Relying on this understanding, drop the rows that are test errands

In [4]:
errand_df = errand_df[errand_df['is_test_errand'] != 1]


In [16]:
X = errand_df['errand_category'].unique().tolist()
print(X)

In [6]:
errand_df['created'] = pd.to_datetime(errand_df['created'])
errand_df['YearMonth'] = errand_df['created'].dt.to_period('M')
price_index = errand_df.columns.get_loc('created')
errand_df.insert(price_index + 1, 'YearMonth', errand_df.pop('YearMonth'))
# errand_df.drop('created', axis=1)






In [17]:
category_counts = errand_df['errand_category'].value_counts()
total_errands = category_counts.sum()

threshold = 0.02 * total_errands
low_frequency_categories = category_counts[category_counts < threshold].index


errand_df['errand_category'] = errand_df['errand_category'].replace(low_frequency_categories, 'Other')
category_trends = errand_df.groupby(['YearMonth', 'errand_category']).size().reset_index(name='Count')

pivot_data = category_trends.pivot(index='YearMonth', columns='errand_category', values='Count').fillna(0)



pivot_data.plot(kind='line', figsize=(10, 6), marker='o')
plt.title('Errand Category Trends Over Time')
plt.xlabel('Time (Year-Month)')
plt.ylabel('Count of Errands')
plt.legend(title='Errand Category', loc= 'upper right')
plt.grid(True)

plt.show()

The most common errand type within each errand category

In [18]:
type_counts = errand_df.groupby(['errand_category', 'errand_type']).size().reset_index(name='Count')

most_common_types = type_counts.loc[type_counts.groupby('errand_category')['Count'].idxmax()]
print(most_common_types)


In [None]:

channel_prefs = errand_df.groupby(['errand_category', 'errand_channel']).size().reset_index(name='Count')

pivot_data = channel_prefs.pivot(index='errand_category', columns='errand_channel', values='Count').fillna(0)

pivot_normalized = pivot_data.div(pivot_data.sum(axis=1), axis=0)

plt.figure(figsize=(10, 6))
sns.heatmap(pivot_normalized, annot=True, cmap="YlGnBu", fmt=".2f", cbar_kws={'label': 'Proportion'})
plt.title('Channel Preferences by Errand Category')
plt.xlabel('Errand Channel')
plt.ylabel('Errand Category')
plt.tight_layout()
plt.show()


In [None]:
errand_df['Hour'] = errand_df['created'].dt.hour
errand_df['Day_of_Week'] = errand_df['created'].dt.day_name()
errand_df['Month'] = errand_df['created'].dt.month
errand_df['Year'] = errand_df['created'].dt.year

hourly_trend = errand_df.groupby('Hour').size()

plt.figure(figsize=(10, 6))
hourly_trend.plot(kind='bar', color='skyblue', alpha=0.8)
plt.title('Customer Service Demand by Hour', fontsize=14)
plt.xlabel('Hour of Day', fontsize=12)
plt.ylabel('Number of Errands', fontsize=12)
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

weekday_trend = errand_df.groupby('Day_of_Week').size()


plt.figure(figsize=(10, 6))
weekday_trend.reindex(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']).plot(kind='bar', color='lightcoral', alpha=0.8)
plt.title('Customer Service Demand by Day of the Week', fontsize=14)
plt.xlabel('Day of Week', fontsize=12)
plt.ylabel('Number of Errands', fontsize=12)
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
action_counts = errand_df['errand_action'].value_counts()

top_actions = action_counts.head(10)
print(top_actions)

In [None]:
most_frequent_action = top_actions.index[0] 
filtered_df = errand_df[errand_df['errand_action'] == most_frequent_action]

distribution = filtered_df['errand_category'].value_counts()
print(distribution)

In [23]:
action_counts = errand_df.groupby(["errand_category", "errand_action"]).size().reset_index(name="Count")
most_frequent_action = action_counts.loc[action_counts.groupby("errand_category")["Count"].idxmax()]


print("Most Frequent Errand Action by Category:")
print(most_frequent_action)
