In [None]:
!pip install fpdf

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from fpdf import FPDF
import nbformat as nbf
import pickle
import os

# 1. Data Loading and Preparation
def load_and_prepare_data():
    np.random.seed(42)
    dates = pd.date_range(start='2023-01-01', end='2024-12-31', freq='D')
    data = pd.DataFrame({
        'date': dates,
        'dish_A_sales': np.random.poisson(lam=50, size=len(dates)),
        'dish_B_sales': np.random.poisson(lam=70, size=len(dates)),
        'dish_C_sales': np.random.poisson(lam=40, size=len(dates)),
        'promotions': np.random.choice([0, 1], size=len(dates), p=[0.8, 0.2]),
        'special_event': np.random.choice([0, 1], size=len(dates), p=[0.95, 0.05]),
        'temperature': np.random.uniform(low=15, high=35, size=len(dates))
    })
    data['total_demand'] = data['dish_A_sales'] + data['dish_B_sales'] + data['dish_C_sales']
    data['month'] = data['date'].dt.month
    os.makedirs('data', exist_ok=True)
    data.to_csv('data/restaurant_sales_data.csv', index=False)
    return data

# 2. Exploratory Data Analysis
def exploratory_data_analysis(data):
    os.makedirs('visualizations', exist_ok=True)

    plt.figure(figsize=(10, 6))
    sns.histplot(data['total_demand'], kde=True, color='skyblue')
    plt.title('Distribution of Total Demand', fontsize=16)
    plt.xlabel('Total Demand', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.tight_layout()
    plt.savefig('visualizations/total_demand_distribution.png')
    plt.close()

    plt.figure(figsize=(10, 6))
    sns.lineplot(data=data, x='date', y='total_demand', color='green')
    plt.title('Total Demand Over Time', fontsize=16)
    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Total Demand', fontsize=12)
    plt.tight_layout()
    plt.savefig('visualizations/total_demand_over_time.png')
    plt.close()

    plt.figure(figsize=(10, 6))
    sns.boxplot(data=data[['dish_A_sales', 'dish_B_sales', 'dish_C_sales']], palette='Set2')
    plt.title('Sales Distribution per Dish', fontsize=16)
    plt.xlabel('Dish', fontsize=12)
    plt.ylabel('Sales', fontsize=12)
    plt.tight_layout()
    plt.savefig('visualizations/sales_per_dish.png')
    plt.close()

# 3. Model Training
def train_model(data):
    X = data[['dish_A_sales', 'dish_B_sales', 'dish_C_sales', 'promotions', 'special_event', 'temperature', 'month']]
    y = data['total_demand']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    os.makedirs('models', exist_ok=True)
    with open('models/demand_forecasting_model.pkl', 'wb') as file:
        pickle.dump(model, file)

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    plt.figure(figsize=(10, 6))
    plt.plot(y_test.values, label='True Demand', marker='o', color='blue')
    plt.plot(y_pred, label='Predicted Demand', marker='x', color='red')
    plt.title('Model Prediction vs True Demand', fontsize=16)
    plt.xlabel('Test Samples', fontsize=12)
    plt.ylabel('Total Demand', fontsize=12)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('visualizations/model_predictions.png')
    plt.close()

    return mse, r2

# 4. Generate PDF Report
def generate_pdf_report(mse, r2):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    pdf.set_font("Arial", style='B', size=16)
    pdf.cell(200, 10, "Project 15: Demand Forecasting in a Restaurant Chain", ln=True, align='C')
    pdf.ln(10)

    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "This project simulates demand forecasting in a restaurant chain. "
                          "The goal is to predict demand for different dishes and optimize purchase planning.")

    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Methodology:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "- Data collection: Simulated sales data for dishes, promotions, and events.")
    pdf.multi_cell(0, 10, "- Model: RandomForestRegressor to predict total demand.")
    pdf.multi_cell(0, 10, "- Evaluation: Mean Squared Error and R-squared metrics.")

    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Analysis and Results:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, f"The model achieved the following metrics:\n- Mean Squared Error (MSE): {mse:.2f}\n"
                          f"- R-squared (R²): {r2:.2f}")

    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Recommendations:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "- Incorporate additional features like holidays, weather conditions, etc.")
    pdf.multi_cell(0, 10, "- Experiment with advanced models like Gradient Boosting or Neural Networks.")
    pdf.multi_cell(0, 10, "- Deploy the model in a production environment for real-time predictions.")

    pdf.output('Demand_Forecasting_Report.pdf')

# 5. Generate README.md
def generate_readme():
    with open('README.md', 'w') as file:
        file.write("# Project 15: Demand Forecasting in a Restaurant Chain\n\n")
        file.write("## Description\n")
        file.write("This project predicts the demand for different dishes in a restaurant chain.\n")
        file.write("## Objectives\n")
        file.write("- Analyze and forecast total demand.\n")
        file.write("- Use RandomForest for modeling.\n")
        file.write("## How to Run\n")
        file.write("Run the main script: 'python demand_forecasting.py'\n")
        file.write("## Results\n")
        file.write("MSE and R-squared indicate model performance.\n")

# Main Execution
if __name__ == '__main__':
    data = load_and_prepare_data()
    exploratory_data_analysis(data)
    mse, r2 = train_model(data)
    generate_pdf_report(mse, r2)
    generate_readme()


Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=3f77abd434d2ddc3d40435eb07dac42a2bde79b3895f52a5bee4bdca2b05e6c9
  Stored in directory: /root/.cache/pip/wheels/f9/95/ba/f418094659025eb9611f17cbcaf2334236bf39a0c3453ea455
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [None]:
def generate_readme():
    with open('README.md', 'w') as file:
        file.write("# Project 15: Demand Forecasting in a Restaurant Chain\n\n")
        file.write("## Description\n")
        file.write("This project aims to predict the demand for different dishes in a restaurant chain, "
                   "helping to optimize purchase planning and reduce waste.\n\n")

        file.write("## Objectives\n")
        file.write("- Analyze historical sales data for dishes, promotions, and special events.\n")
        file.write("- Use machine learning models to forecast demand for different dishes.\n")
        file.write("- Provide insights and recommendations for optimizing restaurant operations.\n\n")

        file.write("## Data\n")
        file.write("The dataset contains simulated sales data from January 2023 to December 2024, including:\n")
        file.write("- Sales data for three dishes (Dish A, Dish B, Dish C)\n")
        file.write("- Information about promotions and special events\n")
        file.write("- Temperature data to account for potential weather-related demand variations\n\n")

        file.write("## Methodology\n")
        file.write("1. **Data Preparation:** Load and preprocess the data, adding relevant features for analysis.\n")
        file.write("2. **Exploratory Data Analysis:** Analyze sales trends, distributions, and correlations.\n")
        file.write("3. **Modeling:** Train a RandomForestRegressor model to predict total demand.\n")
        file.write("4. **Evaluation:** Use metrics like Mean Squared Error (MSE) and R-squared (R²) to evaluate model performance.\n")
        file.write("5. **Visualization:** Generate visualizations for demand trends and model predictions.\n\n")

        file.write("## Results\n")
        file.write("The model achieved the following performance metrics:\n")
        file.write("- **Mean Squared Error (MSE):** Measures the average squared difference between predicted and actual values.\n")
        file.write("- **R-squared (R²):** Indicates the proportion of variance in the dependent variable explained by the model.\n\n")

        file.write("## How to Run\n")
        file.write("1. Ensure all dependencies are installed:\n")
        file.write("   ```bash\n")
        file.write("   pip install pandas numpy matplotlib seaborn scikit-learn fpdf nbformat\n")
        file.write("   ```\n")
        file.write("2. Run the main script to execute the project:\n")
        file.write("   ```bash\n")
        file.write("   python demand_forecasting_project_final_retry_v7.py\n")
        file.write("   ```\n\n")

        file.write("## Recommendations\n")
        file.write("- Incorporate additional features like holidays and external events for better prediction accuracy.\n")
        file.write("- Experiment with more advanced models, such as Gradient Boosting or Neural Networks.\n")
        file.write("- Deploy the model in a production environment for real-time forecasting and decision-making.\n\n")

        file.write("## Requirements\n")
        file.write("- Python 3.x\n")
        file.write("- pandas\n")
        file.write("- numpy\n")
        file.write("- matplotlib\n")
        file.write("- seaborn\n")
        file.write("- scikit-learn\n")
        file.write("- fpdf\n")
        file.write("- nbformat\n\n")

# Execute a função para gerar o README.md
generate_readme()


In [None]:
from fpdf import FPDF

def generate_detailed_pdf_report(mse, r2):
    pdf = FPDF()
    pdf.add_page()

    # Title of the report
    pdf.set_font("Arial", style='B', size=16)
    pdf.cell(200, 10, "Project 15: Demand Forecasting in a Restaurant Chain", ln=True, align='C')
    pdf.ln(10)

    # Introduction section
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "Introduction:\nThis project aims to forecast demand in a restaurant chain, "
                          "helping optimize purchase planning and reduce waste. "
                          "The analysis focuses on predicting the demand for different dishes based on historical data, "
                          "including sales, promotions, and special events.")
    pdf.ln(10)

    # Methodology section
    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Methodology:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "- Data Collection: Simulated sales data from January 2023 to December 2024, including:\n"
                          "  * Sales of three dishes (Dish A, Dish B, Dish C)\n"
                          "  * Information on promotions and special events\n"
                          "  * Temperature data to consider weather-related demand variations\n"
                          "- Data Preparation: Data cleaning, feature engineering, and creation of relevant variables.\n"
                          "- Model: RandomForestRegressor to predict total demand.\n"
                          "- Evaluation Metrics: Mean Squared Error (MSE) and R-squared (R²).")
    pdf.ln(10)

    # Analysis and Results section
    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Analysis and Results:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, f"The model achieved the following performance metrics:\n"
                          f"- Mean Squared Error (MSE): {mse:.2f}\n"
                          f"- R-squared (R²): {r2:.2f}\n")
    pdf.multi_cell(0, 10, "The results indicate that the model effectively captures demand trends, "
                          "but further improvements could be made by including additional features, "
                          "such as holidays and external events.")
    pdf.ln(10)

    # Visualizations section
    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Visualizations:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "- Distribution of Total Demand: Shows the distribution of demand over the period.\n"
                          "- Total Demand Over Time: Illustrates demand trends over the analyzed period.\n"
                          "- Sales Distribution per Dish: Provides an overview of sales per dish.\n"
                          "- Model Predictions vs True Demand: Compares the model's predictions with actual demand.")
    pdf.ln(10)

    # Conclusions and Recommendations section
    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Conclusions and Recommendations:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "- The model can be further enhanced by including more granular data, such as hourly sales.\n"
                          "- Advanced models like Gradient Boosting or Neural Networks could be explored.\n"
                          "- Deploying the model in a production environment could help with real-time decision-making.\n"
                          "- Regular updates to the model with new data are recommended to maintain accuracy.")
    pdf.ln(10)

    # Final Notes section
    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(0, 10, "Final Notes:", ln=True)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, "This report provides a comprehensive overview of demand forecasting in a restaurant chain. "
                          "The analysis demonstrates the potential of machine learning models to support decision-making "
                          "and operational efficiency in the food service industry.")

    # Save the PDF report
    pdf.output('Detailed_Demand_Forecasting_Report.pdf')

# Example call to the function (replace mse and r2 with your model's values)
mse_example = 500.23  # Example of MSE
r2_example = 0.85  # Example of R²
generate_detailed_pdf_report(mse_example, r2_example)


In [None]:
import shutil
from google.colab import files

shutil.make_archive("15", 'zip', ".")

files.download("15.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>