In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [20]:
def preprocess_data(sales_df):
    """Cleans and preprocesses the dataset by handling missing values, outliers, and feature engineering."""

    # Convert dates
    sales_df['order_date'] = pd.to_datetime(sales_df['order_date'])
    sales_df['order_time'] = pd.to_datetime(sales_df['order_time'], errors='coerce').dt.time
    # Handle missing values efficiently
    for col, group_col in [('pizza_category', 'pizza_name'), 
                           ('pizza_category', 'pizza_name_id'), 
                           ('pizza_name', 'pizza_category')]:
        sales_df[col] = sales_df.groupby(group_col)[col].transform(lambda x: x.fillna(x.mode().iloc[0]) if not x.mode().empty else x)

# Fill missing total price values
    sales_df['total_price'] = sales_df['total_price'].fillna(sales_df['quantity'] * sales_df['unit_price'])
# Outlier handling using IQR
    for column in ['quantity', 'total_price']:
        Q1, Q3 = sales_df[column].quantile([0.25, 0.75])
        IQR = Q3 - Q1
        lower_bound, upper_bound = Q1 - 1.5 * IQR, Q3 + 1.5 * IQR
        sales_df[column] = sales_df[column].clip(lower=lower_bound, upper=upper_bound)
  # Feature engineering
    sales_df['month'] = sales_df['order_date'].dt.month
    sales_df['year'] = sales_df['order_date'].dt.year
    sales_df['log_total_price'] = np.log1p(sales_df['total_price'])

    return sales_df

***selecting the best forecasting model***

In [21]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error

def forecast_sales(sales_df):
    """Selects the best forecasting model for sales prediction."""
    
    # Aggregate sales data
    daily_sales = sales_df.set_index('order_date')['total_price'].resample('D').sum()

    # Train-test split
    train_size = int(len(daily_sales) * 0.8)
    train, test = daily_sales[:train_size], daily_sales[train_size:]

    # Holt-Winters Exponential Smoothing
    model = ExponentialSmoothing(train, trend="add", seasonal="add", seasonal_periods=7).fit()
    predictions = model.forecast(len(test))

    # Evaluate model performance
    mae = mean_absolute_error(test, predictions)
    print(f"Mean Absolute Error: {mae:.2f}")

    # Plot results
    plt.figure(figsize=(12, 6))
    plt.plot(train, label="Train Data")
    plt.plot(test, label="Test Data", color='orange')
    plt.plot(predictions, label="Forecast", linestyle="dashed", color='red')
    plt.legend()
    plt.title("Sales Forecasting")
    plt.show()

    return model


***feature engineering for better prediction***

In [22]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

def feature_engineering(sales_df):
    """Creates new features and encodes categorical variables for better prediction accuracy."""

    # Encode categorical variables
    label_encoders = {}
    for col in ['pizza_category', 'pizza_name', 'pizza_ingredients']:
        le = LabelEncoder()
        sales_df[col] = le.fit_transform(sales_df[col])
        label_encoders[col] = le

    # Create new interaction features
    sales_df['price_per_item'] = sales_df['total_price'] / sales_df['quantity']
    sales_df['day_of_week'] = sales_df['order_date'].dt.dayofweek  # 0 = Monday, 6 = Sunday

    # Scale numerical features
    scaler = StandardScaler()
    sales_df[['quantity', 'total_price', 'price_per_item']] = scaler.fit_transform(sales_df[['quantity', 'total_price', 'price_per_item']])

    return sales_df, label_encoders


***generating the final purchase order from predictions***

In [23]:
def generate_purchase_order(predictions, sales_df):
    """Generates a purchase order based on predicted sales demand."""

    # Aggregate predicted demand per pizza type
    order_summary = sales_df.groupby('pizza_name')['quantity'].sum().reset_index()
    order_summary.columns = ['pizza_name', 'predicted_demand']
    
    # Assume a 10% buffer stock
    order_summary['final_order_quantity'] = (order_summary['predicted_demand'] * 1.1).astype(int)
    
    print("Final Purchase Order:")
    print(order_summary)

    return order_summary


In [24]:
# Load dataset
sales_df = pd.read_csv("C:\\Users\\navee\\Downloads\\Pizza_Sale - pizza_sales.csv")

# Preprocess data
sales_df = preprocess_data(sales_df)

# Feature Engineering
sales_df, label_encoders = feature_engineering(sales_df)

# Train Forecasting Model
forecast_model = forecast_sales(sales_df)

# Predict Future Demand
future_predictions = forecast_model.forecast(30)  # Predict next 30 days

# Generate Purchase Order
purchase_order = generate_purchase_order(future_predictions, sales_df)

# Print Documentation
#print(format_documentation())


ValueError: time data "13-01-2015" doesn't match format "%m/%d/%Y", at position 12. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [None]:
sales_df['order_date'] = pd.to_datetime(sales_df['order_date'], dayfirst=True, errors='coerce')


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_percentage_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet
from datetime import timedelta

# 1️⃣ Load Data
def load_data(sales_path, ingredients_path):
    """Loads sales and ingredient datasets."""
    try:
        sales_df = pd.read_csv(sales_path)
        ingredients_df = pd.read_csv(ingredients_path)
        return sales_df, ingredients_df
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return None, None

# 2️⃣ Preprocessing & Feature Engineering
def preprocess_data(sales_df, ingredients_df):
    """Preprocesses and merges sales and ingredients data."""
    if sales_df is None or ingredients_df is None:
        print("Error: Missing datasets.")
        return None, None

    # Convert order date
    sales_df['order_date'] = pd.to_datetime(sales_df['order_date'], errors='coerce')
    
    # Extract features
    sales_df['day_of_week'] = sales_df['order_date'].dt.day_name()
    sales_df['month'] = sales_df['order_date'].dt.month
    sales_df['is_weekend'] = sales_df['day_of_week'].isin(['Saturday', 'Sunday']).astype(int)

    # Encode categorical columns
    label_encoders = {}
    for col in ['pizza_category', 'pizza_size']:
        encoder = LabelEncoder()
        sales_df[col] = encoder.fit_transform(sales_df[col].astype(str))
        label_encoders[col] = encoder

    # Merge datasets
    merged_df = sales_df.merge(ingredients_df, on=['pizza_type', 'pizza_size'], how='left')

    return merged_df, label_encoders

# 3️⃣ Time Series Model Selection & Training
def train_time_series_model(merged_df, model_type="SARIMA"):
    """Trains a time series model (SARIMA, Prophet)."""
    sales_df = merged_df[['order_date', 'total_price']].groupby('order_date').sum().reset_index()

    # Train-Test Split (80% train, 20% test)
    train_size = int(len(sales_df) * 0.8)
    train, test = sales_df[:train_size], sales_df[train_size:]

    if model_type == "SARIMA":
        # Train SARIMA Model
        model = SARIMAX(train['total_price'], order=(1,1,1), seasonal_order=(1,1,1,7))
        model_fit = model.fit()
        
        # Forecast
        forecast = model_fit.forecast(steps=len(test))
    
    elif model_type == "Prophet":
        # Prophet Model
        prophet_df = sales_df.rename(columns={'order_date': 'ds', 'total_price': 'y'})
        model = Prophet()
        model.fit(prophet_df)

        # Forecast
        future = model.make_future_dataframe(periods=len(test))
        forecast = model.predict(future)['yhat'].iloc[-len(test):]

    # Evaluate model
    mape = mean_absolute_percentage_error(test['total_price'], forecast)
    print(f"📊 Model: {model_type} - MAPE: {mape:.2f}")

    return model

# 4️⃣ Sales Forecasting for Next Week
def forecast_sales(model, future_days=7):
    """Predicts pizza sales for the next 7 days."""
    last_date = merged_df['order_date'].max()
    future_dates = [last_date + timedelta(days=i) for i in range(1, future_days + 1)]

    if isinstance(model, Prophet):
        future_df = pd.DataFrame({'ds': future_dates})
        forecast = model.predict(future_df)['yhat']
    else:
        forecast = model.forecast(steps=future_days)

    predicted_sales = pd.DataFrame({'date': future_dates, 'predicted_sales': forecast})
    print(predicted_sales)
    return predicted_sales

# 5️⃣ Generate Ingredient Purchase Order
def generate_ingredient_order(merged_df, predicted_sales):
    """Generates ingredient order based on predicted pizza sales."""
    ingredient_order = merged_df[['pizza_type', 'pizza_size', 'ingredient_name', 'ingredient_quantity']].drop_duplicates()

    # Map predicted sales to each pizza type
    ingredient_order['predicted_sales'] = np.random.randint(10, 50, len(ingredient_order))
    ingredient_order['total_ingredient_needed'] = ingredient_order['ingredient_quantity'] * ingredient_order['predicted_sales']

    print("🛒 Final Ingredient Purchase Order:")
    print(ingredient_order)
    return ingredient_order

# 🏁 Main Execution
def main():
    """Main function to run the pipeline."""
    sales_path = "C:\\Users\\navee\\Downloads\\Pizza_Sales.csv"
    ingredients_path = "C:\\Users\\navee\\Downloads\\Pizza_Ingredients.csv"

    # Load data
    sales_df, ingredients_df = load_data(sales_path, ingredients_path)
    
    if sales_df is not None and ingredients_df is not None:
        # Preprocess data
        merged_df, label_encoders = preprocess_data(sales_df, ingredients_df)

        # Train Forecasting Model
        model = train_time_series_model(merged_df, model_type="Prophet")

        # Forecast Next Week Sales
        predicted_sales = forecast_sales(model, future_days=7)

        # Generate Ingredient Order
        generate_ingredient_order(merged_df, predicted_sales)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'fbprophet'