# DIMINOS DELIVERY TIME - EXPLORATORY DATA ANALYSIS (EDA)

**Purpose:** Analyze delivery time performance and SLA compliance

**Dataset:** 15,000 pizza delivery orders from Diminos franchise

**SLA Target:** 95th percentile delivery time should be ≤ 31 minutes

## Section 1: Imports and Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 6)

## Section 2: Data Loading and Initial Exploration

In [None]:
# Load dataset
df = pd.read_csv('diminos_data.csv')

print("=" * 80)
print("DIMINOS DELIVERY TIME ANALYSIS - INITIAL EXPLORATION")
print("=" * 80)
print(f"\nDataset Shape: {df.shape}")
print(f"Total Orders: {df.shape[0]:,}")
print(f"\nColumn Names:\n{df.columns.tolist()}")
print(f"\nData Types:\n{df.dtypes}")
print(f"\nFirst 5 Rows:\n{df.head()}")
print(f"\nMissing Values:\n{df.isnull().sum()}")

## Section 3: Data Preprocessing

In [None]:
# Convert datetime columns
df['order_placed_at'] = pd.to_datetime(df['order_placed_at'])
df['order_delivered_at'] = pd.to_datetime(df['order_delivered_at'])

# Calculate delivery time in minutes
df['delivery_time_minutes'] = (df['order_delivered_at'] - df['order_placed_at']).dt.total_seconds() / 60

# Extract temporal features
df['hour_placed'] = df['order_placed_at'].dt.hour
df['day_of_week'] = df['order_placed_at'].dt.day_name()
df['date'] = df['order_placed_at'].dt.date
df['month'] = df['order_placed_at'].dt.month
df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday']).astype(int)

print("\n" + "=" * 80)
print("DATA PREPROCESSING COMPLETE")
print("=" * 80)
print(f"Delivery Time Range: {df['delivery_time_minutes'].min():.2f} to {df['delivery_time_minutes'].max():.2f} minutes")

## Section 4: Descriptive Statistics

In [None]:
print("\n" + "=" * 80)
print("DESCRIPTIVE STATISTICS - DELIVERY TIME ANALYSIS")
print("=" * 80)

stats_dict = df['delivery_time_minutes'].describe()
print(f"\n{stats_dict}")

# Additional statistics
print(f"\nAdditional Metrics:")
print(f"Variance: {df['delivery_time_minutes'].var():.2f}")
print(f"Skewness: {df['delivery_time_minutes'].skew():.4f}")
print(f"Kurtosis: {df['delivery_time_minutes'].kurtosis():.4f}")

## Section 5: Percentile Analysis & SLA Compliance

In [None]:
print("\n" + "=" * 80)
print("PERCENTILE ANALYSIS & SLA COMPLIANCE")
print("=" * 80)

sla_threshold = 31
percentiles = [10, 25, 50, 75, 90, 95, 99]

print(f"\nPercentile Analysis:")
print(f"{'Percentile':<15} {'Time (mins)':<20} {'Status':<15}")
print("-" * 50)

for p in percentiles:
    value = df['delivery_time_minutes'].quantile(p/100)
    status = "✓ PASS" if value <= sla_threshold else "✗ FAIL"
    print(f"{p}th{'':<10} {value:>10.2f}{'':<10} {status:<15}")

# SLA Compliance
sla_compliant = (df['delivery_time_minutes'] <= sla_threshold).sum()
sla_non_compliant = (df['delivery_time_minutes'] > sla_threshold).sum()
compliance_pct = (sla_compliant / len(df)) * 100

print(f"\nSLA Compliance (≤ 31 minutes):")
print(f"Compliant Orders: {sla_compliant:,} ({compliance_pct:.2f}%)")
print(f"Non-Compliant Orders: {sla_non_compliant:,} ({100-compliance_pct:.2f}%)")

p95 = df['delivery_time_minutes'].quantile(0.95)
print(f"\n95th Percentile Delivery Time: {p95:.2f} minutes")
print(f"SLA Status: {'✓ PASS' if p95 <= sla_threshold else '✗ FAIL'}")

## Section 6: Temporal Analysis - Hourly Patterns

In [None]:
print("\n" + "=" * 80)
print("TEMPORAL ANALYSIS - HOURLY PATTERNS")
print("=" * 80)

hour_analysis = df.groupby('hour_placed')['delivery_time_minutes'].agg({
    'mean': 'mean',
    'median': 'median',
    'std': 'std',
    'min': 'min',
    'max': 'max',
    'count': 'count'
}).round(2)

hour_analysis.columns = ['Mean', 'Median', 'Std Dev', 'Min', 'Max', 'Orders']
print(f"\n{hour_analysis}")

# Identify peak hours
print(f"\nPeak Hours Analysis:")
peak_threshold = hour_analysis['Mean'].quantile(0.75)
peak_hours = hour_analysis[hour_analysis['Mean'] > peak_threshold].index.tolist()
print(f"Peak Hours (above 75th percentile): {peak_hours}")

for hour in peak_hours:
    mean_time = hour_analysis.loc[hour, 'Mean']
    orders = int(hour_analysis.loc[hour, 'Orders'])
    print(f" Hour {hour:2d}: Avg {mean_time:6.2f} mins, {orders:4d} orders")

## Section 7: Temporal Analysis - Day of Week Patterns

In [None]:
print("\n" + "=" * 80)
print("TEMPORAL ANALYSIS - DAY OF WEEK PATTERNS")
print("=" * 80)

day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

day_analysis = df.groupby('day_of_week')['delivery_time_minutes'].agg({
    'mean': 'mean',
    'median': 'median',
    'std': 'std',
    'count': 'count'
}).round(2)

day_analysis = day_analysis.reindex([d for d in day_order if d in day_analysis.index])
day_analysis.columns = ['Mean', 'Median', 'Std Dev', 'Orders']
print(f"\n{day_analysis}")

# Weekday vs Weekend comparison
weekday_mean = df[df['is_weekend'] == 0]['delivery_time_minutes'].mean()
weekend_mean = df[df['is_weekend'] == 1]['delivery_time_minutes'].mean()

print(f"\nWeekday vs Weekend Comparison:")
print(f"Weekday Average: {weekday_mean:.2f} minutes")
print(f"Weekend Average: {weekend_mean:.2f} minutes")
print(f"Difference: {abs(weekend_mean - weekday_mean):.2f} minutes ({abs(weekend_mean - weekday_mean)/weekday_mean*100:.1f}%)")

## Section 8: Outlier Detection & Anomalies

In [None]:
print("\n" + "=" * 80)
print("OUTLIER DETECTION & ANOMALIES")
print("=" * 80)

# IQR method
Q1 = df['delivery_time_minutes'].quantile(0.25)
Q3 = df['delivery_time_minutes'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = df[(df['delivery_time_minutes'] < lower_bound) | (df['delivery_time_minutes'] > upper_bound)]

print(f"\nInterquartile Range (IQR) Analysis:")
print(f"Q1 (25th percentile): {Q1:.2f} minutes")
print(f"Q3 (75th percentile): {Q3:.2f} minutes")
print(f"IQR: {IQR:.2f} minutes")
print(f"Lower Bound: {lower_bound:.2f} minutes")
print(f"Upper Bound: {upper_bound:.2f} minutes")
print(f"\nOutliers (>1.5×IQR from quartiles): {len(outliers)} orders ({len(outliers)/len(df)*100:.2f}%)")

# Extreme outliers
extreme_outliers = df[df['delivery_time_minutes'] > 100]
print(f"\nExtreme Delays (>100 minutes): {len(extreme_outliers)} orders ({len(extreme_outliers)/len(df)*100:.2f}%)")
print(f"Max Delivery Time: {df['delivery_time_minutes'].max():.2f} minutes ({df['delivery_time_minutes'].max()/60:.1f} hours)")

## Section 9: Distribution Analysis

In [None]:
print("\n" + "=" * 80)
print("DISTRIBUTION ANALYSIS")
print("=" * 80)

# Normality test (Shapiro-Wilk for sample)
sample_size = 5000
sample_data = df['delivery_time_minutes'].sample(n=sample_size, random_state=42)
stat, p_value = stats.shapiro(sample_data)

print(f"\nShapiro-Wilk Normality Test (sample of {sample_size}):")
print(f"Test Statistic: {stat:.6f}")
print(f"P-value: {p_value:.6e}")
print(f"Result: {'NOT Normal (p < 0.05)' if p_value < 0.05 else 'Normal (p ≥ 0.05)'}")

# Distribution characteristics
print(f"\nDistribution Characteristics:")
skewness = df['delivery_time_minutes'].skew()
kurtosis = df['delivery_time_minutes'].kurtosis()
print(f"Skewness: {skewness:.4f} ({'Right-skewed' if skewness > 0 else 'Left-skewed'})")
print(f"Kurtosis: {kurtosis:.4f} ({'Heavy-tailed' if kurtosis > 0 else 'Light-tailed'})")

## Section 10: Visualization - Distribution Analysis

In [None]:
print("\n" + "=" * 80)
print("CREATING VISUALIZATIONS")
print("=" * 80)

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Histogram with density
ax1 = axes[0, 0]
ax1.hist(df['delivery_time_minutes'], bins=100, edgecolor='black', alpha=0.7, density=True)
ax1.axvline(sla_threshold, color='red', linestyle='--', linewidth=2.5, label=f'SLA Threshold ({sla_threshold} mins)')
ax1.axvline(p95, color='blue', linestyle='--', linewidth=2.5, label=f'95th Percentile ({p95:.2f} mins)')
ax1.axvline(df['delivery_time_minutes'].mean(), color='green', linestyle='-', linewidth=2, label=f'Mean ({df['delivery_time_minutes'].mean():.2f} mins)')
ax1.set_xlabel('Delivery Time (minutes)', fontsize=11, fontweight='bold')
ax1.set_ylabel('Density', fontsize=11, fontweight='bold')
ax1.set_title('Distribution of Pizza Delivery Times', fontsize=13, fontweight='bold')
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
ax1.set_xlim(0, 100)

# Plot 2: Box plot
ax2 = axes[0, 1]
box = ax2.boxplot(df['delivery_time_minutes'], vert=True, patch_artist=True)
box['boxes'][0].set_facecolor('lightblue')
ax2.axhline(sla_threshold, color='red', linestyle='--', linewidth=2.5, label='SLA Threshold')
ax2.axhline(p95, color='blue', linestyle='--', linewidth=2.5, label='95th Percentile')
ax2.set_ylabel('Delivery Time (minutes)', fontsize=11, fontweight='bold')
ax2.set_title('Delivery Time Distribution (Box Plot)', fontsize=13, fontweight='bold')
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3, axis='y')
ax2.set_ylim(0, 100)

# Plot 3: Hourly pattern
ax3 = axes[1, 0]
hour_mean = df.groupby('hour_placed')['delivery_time_minutes'].mean()
ax3.plot(hour_mean.index, hour_mean.values, marker='o', linewidth=2.5, markersize=6, color='darkblue')
ax3.fill_between(hour_mean.index, hour_mean.values, alpha=0.3)
ax3.axhline(sla_threshold, color='red', linestyle='--', linewidth=2, label='SLA Threshold')
ax3.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
ax3.set_ylabel('Avg Delivery Time (minutes)', fontsize=11, fontweight='bold')
ax3.set_title('Delivery Time Pattern by Hour of Day', fontsize=13, fontweight='bold')
ax3.set_xticks(range(0, 24, 2))
ax3.legend(fontsize=10)
ax3.grid(True, alpha=0.3)

# Plot 4: Day of week pattern
ax4 = axes[1, 1]
day_mean = df.groupby('day_of_week')['delivery_time_minutes'].mean().reindex(day_order)
colors = ['steelblue'] * 5 + ['coral'] * 2
bars = ax4.bar(range(len(day_mean)), day_mean.values, color=colors, edgecolor='black', linewidth=1.5)
ax4.axhline(sla_threshold, color='red', linestyle='--', linewidth=2, label='SLA Threshold')
ax4.set_xlabel('Day of Week', fontsize=11, fontweight='bold')
ax4.set_ylabel('Avg Delivery Time (minutes)', fontsize=11, fontweight='bold')
ax4.set_title('Delivery Time Pattern by Day of Week', fontsize=13, fontweight='bold')
ax4.set_xticks(range(len(day_mean)))
ax4.set_xticklabels(day_mean.index, rotation=45, ha='right')
ax4.legend(fontsize=10)
ax4.grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for i, (bar, value) in enumerate(zip(bars, day_mean.values)):
    ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, f'{value:.1f}',
    ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('diminos_eda_distribution.png', dpi=300, bbox_inches='tight')
print("✓ Saved: diminos_eda_distribution.png")
plt.show()

## Section 11: Advanced Visualizations

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Heatmap - Hour vs Day
ax1 = axes[0, 0]
pivot_data = df.pivot_table(values='delivery_time_minutes', index='hour_placed', columns='day_of_week', aggfunc='mean')
pivot_data = pivot_data[day_order]
sns.heatmap(pivot_data, annot=True, fmt='.1f', cmap='RdYlGn_r', ax=ax1, cbar_kws={'label': 'Avg Delivery Time (mins)'})
ax1.set_title('Delivery Time Heatmap: Hour × Day of Week', fontsize=13, fontweight='bold')
ax1.set_xlabel('Day of Week', fontsize=11, fontweight='bold')
ax1.set_ylabel('Hour of Day', fontsize=11, fontweight='bold')

# Plot 2: CDF (Cumulative Distribution Function)
ax2 = axes[0, 1]
sorted_times = np.sort(df['delivery_time_minutes'])
cdf = np.arange(1, len(sorted_times) + 1) / len(sorted_times)
ax2.plot(sorted_times, cdf * 100, linewidth=2.5, color='darkblue')
ax2.axvline(sla_threshold, color='red', linestyle='--', linewidth=2.5, label=f'SLA ({sla_threshold} mins)', alpha=0.7)
ax2.axvline(p95, color='blue', linestyle='--', linewidth=2.5, label=f'95th Percentile ({p95:.2f} mins)', alpha=0.7)
ax2.axhline(95, color='green', linestyle=':', linewidth=2, alpha=0.7)
ax2.set_xlabel('Delivery Time (minutes)', fontsize=11, fontweight='bold')
ax2.set_ylabel('Cumulative %', fontsize=11, fontweight='bold')
ax2.set_title('Cumulative Distribution Function (CDF)', fontsize=13, fontweight='bold')
ax2.set_xlim(0, 100)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)

# Plot 3: Weekday vs Weekend comparison
ax3 = axes[1, 0]
weekday_data = df[df['is_weekend'] == 0]['delivery_time_minutes']
weekend_data = df[df['is_weekend'] == 1]['delivery_time_minutes']
positions = [1, 2]
bp = ax3.boxplot([weekday_data, weekend_data], positions=positions, labels=['Weekday', 'Weekend'],
patch_artist=True, widths=0.6)

for patch in bp['boxes']:
    patch.set_facecolor('lightblue')

ax3.axhline(sla_threshold, color='red', linestyle='--', linewidth=2, label='SLA Threshold')
ax3.set_ylabel('Delivery Time (minutes)', fontsize=11, fontweight='bold')
ax3.set_title('Weekday vs Weekend Delivery Times', fontsize=13, fontweight='bold')
ax3.legend(fontsize=10)
ax3.grid(True, alpha=0.3, axis='y')
ax3.set_ylim(0, 100)

# Plot 4: Non-compliance analysis
ax4 = axes[1, 1]
compliant = (df['delivery_time_minutes'] <= sla_threshold).sum()
non_compliant = (df['delivery_time_minutes'] > sla_threshold).sum()
labels = ['Compliant\n(≤31 mins)', 'Non-Compliant\n(>31 mins)']
sizes = [compliant, non_compliant]
colors_pie = ['#2ecc71', '#e74c3c']
explode = (0, 0.1)

wedges, texts, autotexts = ax4.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90,
colors=colors_pie, explode=explode, textprops={'fontsize': 11, 'fontweight': 'bold'})
ax4.set_title('SLA Compliance Status', fontsize=13, fontweight='bold')

# Add count labels
for i, (wedge, size) in enumerate(zip(wedges, sizes)):
    angle = (wedge.theta2 - wedge.theta1) / 2. + wedge.theta1
    x = np.cos(np.deg2rad(angle))
    y = np.sin(np.deg2rad(angle))
    ax4.text(x*0.5, y*0.5, f'{size:,}\norders', ha='center', va='center', fontsize=10, fontweight='bold', color='white')

plt.tight_layout()
plt.savefig('diminos_eda_advanced.png', dpi=300, bbox_inches='tight')
print("✓ Saved: diminos_eda_advanced.png")
plt.show()