In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.holtwinters import ExponentialSmoothing

: 

In [None]:
# Load data
data = pd.read_csv("index.csv")
data.head()

In [None]:
# Data Cleaning
data['card'].fillna('cash', inplace=True)
data['date'] = pd.to_datetime(data['date'])
data['datetime'] = pd.to_datetime(data['datetime'])

In [None]:
# Feature Engineering for Time Series EDA
data['month'] = data['date'].dt.strftime('%Y-%m')
data['day'] = data['date'].dt.strftime('%w').astype(int)  # Day of the week
data['hour'] = data['datetime'].dt.strftime('%H').astype(int)

In [None]:
# Task 1: Time Series Exploratory Data Analysis
# Monthly Sales Trends
monthly_sales = data.groupby(['month', 'coffee_name']).count()['date'].unstack(fill_value=0)

plt.figure(figsize=(12, 6))
monthly_sales.plot(kind='line', marker='o', figsize=(12, 6))
plt.title("Monthly Sales Trends by Coffee Type")
plt.xlabel("Month")
plt.ylabel("Sales Count")
plt.grid()
plt.show()

In [None]:
# Weekly Sales Trends
weekday_sales = data.groupby('day').count()['date']
plt.figure(figsize=(8, 5))
sns.barplot(x=weekday_sales.index, y=weekday_sales.values, color='steelblue')
plt.xticks(ticks=range(7), labels=['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'])
plt.title("Sales by Weekday")
plt.xlabel("Day of the Week")
plt.ylabel("Number of Sales")
plt.show()

In [None]:
# Hourly Sales Trends
hourly_sales = data.groupby('hour').count()['date']
plt.figure(figsize=(8, 5))
sns.barplot(x=hourly_sales.index, y=hourly_sales.values, color='steelblue')
plt.title("Hourly Sales Distribution")
plt.xlabel("Hour of the Day")
plt.ylabel("Number of Sales")
plt.show()

In [None]:
# Task 2: Next Day/Week/Month Sales Prediction
# Aggregate daily sales
daily_sales = data.groupby('date').sum(numeric_only=True).reset_index()
daily_sales.set_index('date', inplace=True)

In [None]:
# Fit a simple exponential smoothing model
model = ExponentialSmoothing(daily_sales['money'], trend="add", seasonal="add", seasonal_periods=7)
fit_model = model.fit()
forecast = fit_model.forecast(steps=7)  # Forecasting next week

In [None]:
# Display Forecast
print("Forecasted Sales for the Next 7 Days:")
print(forecast)

In [None]:

# Plot Forecast with Improvements
plt.figure(figsize=(12, 6))

# Plot actual sales
plt.plot(daily_sales.index, daily_sales['money'], label='Actual Sales', color='skyblue', linewidth=2)

# Plot fitted values
plt.plot(fit_model.fittedvalues.index, fit_model.fittedvalues, label='Fitted Sales', color='red', linestyle='--', linewidth=2)

# Plot forecasted values
forecast_dates = pd.date_range(start=daily_sales.index[-1] + pd.Timedelta(days=1), periods=7)
plt.plot(forecast_dates, forecast, label='Forecasted Sales', color='orange', linestyle='-.', linewidth=2, marker='o')

# Formatting and readability enhancements
plt.title("Daily Sales: Actual, Fitted, and Forecasted", fontsize=16)
plt.xlabel("Date", fontsize=14)
plt.ylabel("Sales Revenue", fontsize=14)
plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Task 3: Specific Customer Purchases
if 'card' in data.columns:
    customer_purchases = data.groupby('card').agg({
        'money': 'sum',
        'coffee_name': lambda x: x.value_counts().idxmax()
    }).rename(columns={'money': 'total_spent', 'coffee_name': 'favorite_coffee'})
    
    print("Top 5 Customers by Total Spending:")
    print(customer_purchases.sort_values(by='total_spent', ascending=False).head(6))
