
# â˜• Coffee Sales Analysis Project

This project analyzes **Coffee Shop Sales Data** to extract insights about customer purchasing patterns, 
sales trends, and builds a **basic predictive model** for sales.

### Objectives:
- Data Cleaning & Preparation  
- Exploratory Data Analysis (EDA)  
- Time-based sales insights (daily, weekly, hourly)  
- Machine Learning model to predict sales  
- Business Insights  


In [None]:

# =======================
# 1. Import Libraries
# =======================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings("ignore")

sns.set(style="whitegrid")


In [None]:

# =======================
# 2. Load Dataset
# =======================
df = pd.read_csv("coffee_shop_sales.csv")

print("Dataset Shape:", df.shape)
df.head()


In [None]:

# =======================
# 3. Data Cleaning
# =======================
df['date'] = pd.to_datetime(df['date'])
df['datetime'] = pd.to_datetime(df['datetime'])
df['card'].fillna("Cash_Customer", inplace=True)

print("Duplicate rows:", df.duplicated().sum())
df.drop_duplicates(inplace=True)
df.info()


In [None]:

# =======================
# 4. Feature Engineering
# =======================
df['month'] = df['date'].dt.strftime('%Y-%m')
df['day'] = df['date'].dt.day_name()
df['hour'] = df['datetime'].dt.hour

df.head()


In [None]:

# =======================
# 5. Exploratory Data Analysis (EDA)
# =======================

# Revenue by coffee product
revenue_data = df.groupby('coffee_name')['money'].sum().sort_values(ascending=False)
plt.figure(figsize=(10,5))
sns.barplot(x=revenue_data.values, y=revenue_data.index, palette="viridis")
plt.title("Revenue by Coffee Product")
plt.xlabel("Total Revenue")
plt.ylabel("Coffee Type")
plt.show()


In [None]:

# Monthly Sales Trend
monthly_sales = df.groupby('month')['money'].sum()
plt.figure(figsize=(10,5))
monthly_sales.plot(marker='o')
plt.title("Monthly Sales Trend")
plt.xlabel("Month")
plt.ylabel("Revenue")
plt.grid(True)
plt.show()


In [None]:

# Sales by Day of Week
plt.figure(figsize=(8,5))
sns.barplot(x=df['day'], y=df['money'], estimator=sum, 
            order=["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"])
plt.title("Sales by Day of Week")
plt.show()


In [None]:

# Sales by Hour
plt.figure(figsize=(12,5))
sns.lineplot(x=df['hour'], y=df['money'], estimator=sum, ci=None, marker='o')
plt.title("Hourly Sales Pattern")
plt.xlabel("Hour of Day")
plt.ylabel("Revenue")
plt.show()


In [None]:

# =======================
# 6. Machine Learning - Predict Sales
# =======================
ml_data = df.copy()
ml_data['day_num'] = ml_data['date'].dt.day
ml_data['weekday'] = ml_data['date'].dt.weekday

X = ml_data[['day_num', 'hour', 'weekday']]
y = ml_data['money']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])
coefficients


In [None]:

# =======================
# 7. Insights
# =======================
print("\nðŸ“Œ Key Insights:")
print("- Top-selling product:", revenue_data.idxmax())
print("- Highest revenue month:", monthly_sales.idxmax())
print("- Peak sales day:", df.groupby('day')['money'].sum().idxmax())
print("- Peak sales hour:", df.groupby('hour')['money'].sum().idxmax())
