In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
file_path = 'sendy_supply_inbounds_NOVEMBER_2021.csv'
try:
    df = pd.read_csv(file_path)
    print('Dataset loaded successfully!')


In [None]:
# Check dataset info
df.info()

# Check missing values
df.isnull().sum()

In [None]:
# Fill missing transporter_name with 'Unknown'
df['transporter_name'] = df['transporter_name'].fillna('Unknown')

# Verify missing values handled
df.isnull().sum()

## Task 2: Basic Data Analysis

In [None]:
# Basic statistics
df.describe(include='all')

In [None]:
# Group by hub and calculate mean quantity
hub_group = df.groupby('hub')['quantity'].mean().reset_index()
hub_group

## Task 3: Data Visualization

In [None]:
# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Aggregate quantity by date
daily_quantity = df.groupby('date')['quantity'].sum().reset_index()

# Line chart
plt.figure(figsize=(10,5))
plt.plot(daily_quantity['date'], daily_quantity['quantity'], marker='o')
plt.title('Daily Quantity Trend')
plt.xlabel('Date')
plt.ylabel('Total Quantity')
plt.grid(True)
plt.show()

In [None]:
# Bar chart: average quantity per hub
plt.figure(figsize=(10,5))
sns.barplot(x='hub', y='quantity', data=df, estimator=lambda x: sum(x)/len(x))
plt.title('Average Quantity per Hub')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Histogram of quantity
plt.figure(figsize=(8,5))
plt.hist(df['quantity'], bins=20, edgecolor='black')
plt.title('Distribution of Quantity')
plt.xlabel('Quantity')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Scatter plot: quantity vs. handle_in_unit_fee
plt.figure(figsize=(8,5))
plt.scatter(df['quantity'], df['handle_in_unit_fee'], alpha=0.6)
plt.title('Quantity vs Handle-in Unit Fee')
plt.xlabel('Quantity')
plt.ylabel('Handle-in Unit Fee')
plt.show()

## Observations

- The dataset contains inbound supply records with quantities and handling fees.
- Missing values in `transporter_name` were replaced with 'Unknown'.
- The average quantities vary across hubs, with some handling larger volumes.
- Distribution of `quantity` is skewed, with most values clustered at smaller amounts.
- Scatter plot shows the relationship between `quantity` and `handle_in_unit_fee` is fairly flat, suggesting the fee is relatively fixed.