# ☕ Coffee Chain Inventory Prediction
This project aims to predict inventory levels for a fictional U.S.-based coffee chain using historical sales, marketing, product, and financial data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb

In [None]:
df = pd.read_csv("Coffee_chain.csv")
df.head()

In [None]:
df.info()
df.describe()
df.isnull().sum()

In [None]:
corr_matrix = df.corr(numeric_only=True)
top_corr = corr_matrix["Inventory"].abs().sort_values(ascending=False).head(10)
top_corr_features = top_corr.index

plt.figure(figsize=(10, 6))
sns.heatmap(df[top_corr_features].corr(), annot=True, cmap="coolwarm")
plt.title("Top Feature Correlations with Inventory")
plt.tight_layout()
plt.show()

In [None]:
# Drop Date column if non-informative
df.drop(columns=["Date"], inplace=True, errors='ignore')

# Encode categoricals
cat_cols = df.select_dtypes(include='object').columns
for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# Split data
X = df.drop("Inventory", axis=1)
y = df["Inventory"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": xgb.XGBRegressor(random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}\nMAE: {mean_absolute_error(y_test, y_pred):.2f} | RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f} | R²: {r2_score(y_test, y_pred):.4f}\n")

In [None]:
rf_model = models["Random Forest"]
importances = rf_model.feature_importances_
feat_imp = pd.Series(importances, index=X.columns).sort_values(ascending=False)[:15]

plt.figure(figsize=(10, 6))
feat_imp.plot(kind='barh')
plt.title("Top 15 Feature Importances - Random Forest")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

### ✅ Summary
- `Cogs`, `Marketing`, and `Target COGS` are strong predictors of inventory.
- Random Forest and XGBoost gave the best prediction scores.
- This model can assist coffee chains in optimizing stock levels and reducing over/understock.