# AI Applications Project: Report Generation

In [2]:
#Importing the generative ai
%pip install -U -q "google-genai>=1.4.0"

Note: you may need to restart the kernel to use updated packages.


### Setting up API Key

In [4]:
from google import genai

GOOGLE_API_KEY = input("Enter your Google API key:")
client = genai.Client(api_key=GOOGLE_API_KEY)

Enter your Google API key: AIzaSyAWSpYEUyThH2TGJwll1k-JYIQAeo1GjOE


In [5]:
MODEL_ID = "gemini-2.5-flash" # @param ["gemini-2.5-flash-lite","gemini-2.5-flash","gemini-2.5-pro","gemini-2.0-flash"] {"allow-input":true, isTemplate: true}

# Data

## Database Integration

In [8]:
import numpy as np
import pandas as pd
import os
import sys

sys.path.append('../../')

from Database.db import SessionLocal
from Database_Table import Inventory, Order

def getDbContent():
    session = SessionLocal()
    inventory_records = session.query(Inventory).all()
    order_records = session.query(Order).all()
    session.close()
    return inventory_records, order_records

inventory, order = getDbContent()

2025-08-18 18:42:14,654 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2025-08-18 18:42:14,654 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-08-18 18:42:14,655 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2025-08-18 18:42:14,656 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-08-18 18:42:14,656 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2025-08-18 18:42:14,657 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-08-18 18:42:14,657 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-08-18 18:42:14,659 INFO sqlalchemy.engine.Engine SELECT `Inventory`.`ItemId` AS `Inventory_ItemId`, `Inventory`.`ItemName` AS `Inventory_ItemName`, `Inventory`.`ItemCategory` AS `Inventory_ItemCategory`, `Inventory`.`ItemQuantity` AS `Inventory_ItemQuantity`, `Inventory`.`UnitsSold` AS `Inventory_UnitsSold`, `Inventory`.`Weight` AS `Inventory_Weight`, `Inventory`.`Size` AS `Inventory_Size`, `Inventory`.`Priority` AS `Inventory_Priority`, `Inventory`.`Location` AS `Inventory_Location`, `In

In [9]:
def dbtoList(records):
    output_list = []
    for r in records:
        if isinstance(r, Inventory):
            data = {
                "ItemId": r.ItemId, 
                "ItemName": r.ItemName,
                "Category": r.ItemCategory,
                "Quantity": r.ItemQuantity, 
                "UnitsSold": r.UnitsSold,
                "Weight": r.Weight, 
                "Size": r.Size,
                "Priority": r.Priority, 
                "Location": r.Location,
                "Date": r.Date, 
                "Dispose": r.Dispose                
            }
        elif isinstance(r, Order):
            data = {
                "OrderId": r.OrderId,
                "ItemId": r.ItemId, 
                "OrderQuantity": r.OrderQuantity, 
                "Sales": r.Sales, 
                "Price": r.Price, 
                "Discount": r.Discount,
                "Profit": r.Profit, 
                "DateOrdered": r.DateOrdered,
                "DateReceived": r.DateReceived,
                "CustomerSegment": r.CustomerSegment
            }
        else:
            continue
        output_list.append(data)
    
    return output_list

inventoryData = dbtoList(inventory)
orderData = dbtoList(order)

## Supervised Models

In [None]:
import pickle
import joblib
from datetime import datetime
from IPython.display import Markdown

class Supervised_Models:
    # Location Prediction Model
    def predict_location(input_data):
        with open('../../Supervised_models/Samuel/storage_prediction_model.pkl', 'rb') as file:
            storage_prediction_model = pickle.load(file)
            
        categorical_features = {
            'Priority': ['High','Low','Medium'],
            'Product_Type': ['Clothing','Technology','Other','Sports and Fitness'],
            'Size': ['Large','Medium','Small']
        }
        numerical_features = ['Order_Quantity', 'Weight']
        one_hot_columns = []
        
        for feature, values in categorical_features.items():
            for value in values:
                one_hot_columns.append(f"{feature}_{value}")
            
        # Combine with numerical features to get all feature names
        all_feature_names = one_hot_columns + numerical_features

        features_dict = {col: 0 for col in all_feature_names}
    
        # Set one-hot encoded features
        for feature, values in categorical_features.items():
            if feature in input_data:
                selected_value = input_data[feature]
                one_hot_col = f"{feature}_{selected_value}"
                if one_hot_col in features_dict:
                    features_dict[one_hot_col] = 1
    
        # Set numerical features
        for feature in numerical_features:
            if feature in input_data:
                features_dict[feature] = float(input_data[feature])
        
        # Convert to array in the correct order
        features_array = np.array([features_dict[col] for col in all_feature_names]).reshape(1, -1)

        prediction = storage_prediction_model.predict(features_array)
        return prediction

    def demand_forecast_preprocessor(order_data, inventory_data):
        # Create Dataframe
        order = pd.DataFrame(order_data)
        inventory = pd.DataFrame(inventory_data)

        # Ensure dates are in datetime format
        order['DateOrdered'] = pd.to_datetime(order['DateOrdered'])
        
        # Extract Month (period or string, depending on preference)
        order['OrderMonth'] = order['DateOrdered'].dt.to_period('M')
        
        # Merge with Category lookup table
        merged_df = order.merge(inventory, on='ItemId', how='left')
        
        # Group and aggregate
        result_df = (
            merged_df
            .groupby(['OrderMonth', 'Category', 'CustomerSegment'], as_index=False)
            .agg(
                AveragePrice=('Price', 'mean'),
                AverageDiscount=('Discount', 'mean')
            )
        )
        
        return result_df

    # Demand forecast model
    def predict_demand_forecast(input_data):
        demand_forecast_model = joblib.load('../../Supervised_models/ShernFai/model/salesforecast(categories).pkl')
        with open('../../Supervised_models/ShernFai/model/salesforecast_preprocessor.pkl', 'rb') as f:
            preprocessor_data = pickle.load(f)

        categories = {
            "Clothing" : [
                "Cleats",
                "Men's Footwear",
                "Women's Apparel"
            ],
            "Technology": [
                "Electronics",
                "Video Games",
                "Cameras",
                "Computers",
            ],
            "Sports and Fitness": [
                "Cardio Equipment",
                "Indoor/Outdoor Games",
                "Water Sports",
                "Shop By Sport",
                "Camping & Hiking",
                "Fishing"
            ],
            "Other": [
                "Garden",
                "Pet Supplies"
            ]
        }

        cat_keys = list(categories.keys())

        # Extract preprocessor components
        le_category = preprocessor_data['label_encoder_category']
        reference_date = preprocessor_data['reference_date']
        unique_categories = preprocessor_data['unique_categories']
        feature_columns = preprocessor_data['feature_columns']

        # Get data
        category_name = input_data['category']
        future_month = input_data['month']
        avg_price = float(input_data['avg_price'])
        customer_segment = input_data['customer_segment']
        discount_rate = float(input_data['discount_rate'])
        
        # Parse the future date
        future_date = pd.to_datetime(future_month)
        
        # Calculate time features for the future date
        months_since_start = ((future_date - reference_date).days / 30.44)
        
        # Create test data with numerical time features
        test_data = {
            'Category Name': category_name,
            'Average Product Price': avg_price,
            'Customer Segment': customer_segment,
            'Order Item Discount Rate': discount_rate,
            # Time features (numerical - can handle ANY future date!)
            'Year': future_date.year,
            'Month': future_date.month,
            'Quarter': future_date.quarter,
            'Months_Since_Start': int(months_since_start),
            'Month_Sin': np.sin(2 * np.pi * future_date.month / 12),
            'Month_Cos': np.cos(2 * np.pi * future_date.month / 12),
            'Year_Trend': future_date.year - reference_date.year
        }
        
        # Create DataFrame
        test_df = pd.DataFrame([test_data])
        
        # Handle unknown category
        if category_name not in cat_keys:
            print(f"Unknown category '{category_name}' - using default: {cat_keys[0]}")
            test_df['Category Name'] = cat_keys[0]
            category_name = cat_keys[0]
        
        # One-hot encode customer segment
        test_df = pd.get_dummies(test_df, columns=['Customer Segment'], drop_first=True)
        
        # Ensure same columns as training (crucial!)
        test_df = test_df.reindex(columns=feature_columns, fill_value=0)
        
        # Make prediction
        total = 0
        num = len(categories[category_name])
        for subclass in categories[category_name]:
            test_df['Category Name'] = subclass
            test_df['Category Name'] = le_category.transform(test_df['Category Name'])
            total += demand_forecast_model.predict(test_df)
        
        avg_demand = total / num
        
        return avg_demand

    def detect_anomalies(inventory_list):
        anomalies_detected = []
        for item in inventory_list:
            current_location = item['Location']
            predicted_location = Supervised_Models.predict_location({
                "Priority": item['Priority'],
                "Product_Type": item['Category'],
                "Size": item['Size'],
                "Order_Quantity": item['Quantity'],
                "Weight": item['Weight']
            })[0]

            #print(f"\nCurrent Location: {current_location}")
            #print(f"Predicted Location: {predicted_location}")
            if current_location != predicted_location:
                #print(f"Anomaly detected! Item id:{item['ItemId']} is stored at location {current_location} while it should be stored at {predicted_location}.")
                anomalies_detected.append({'ItemId': item['ItemId'], 'ItemName': item['ItemName'], 'CurrentLocation': current_location, 'PredictedLocation': predicted_location})

        return anomalies_detected

s = Supervised_Models

# Testing

print(s.predict_location({
    "Priority": "Medium",
    "Product_Type": "Sports and Fitness",
    "Size": "Medium",
    "Order_Quantity": 12,
    "Weight": 10.78
}))

print(s.detect_anomalies(inventoryData))

result = s.demand_forecast_preprocessor(orderData, inventoryData)
result

print(s.predict_demand_forecast({
    'category': "Clothing",
    'month': "2025-05",
    'avg_price': 10.0,
    'customer_segment': "Consumer",
    'discount_rate': 0.12
}))

# Sections

## Products Overview

In [20]:
# Product Overview: This section summarizes the overall performance of products
product_overview = client.models.generate_content(
    model=MODEL_ID,
    contents='''
Generate an overview of products based on the following:

1. **Top Products**: Identify the top-selling products by sales volume and revenue over the past month.
2. **Product Performance**: Analyze the performance of each product category in terms of sales, demand, and revenue.
3. **Sales by Product**: Present a summary of sales for each product, including revenue, volume sold, and average price.

Data:
- Sales Data: {sales_data}
- Product Categories: {product_categories_data}
- Product Sales Volume: {sales_volume_data}
- Product Revenue: {product_revenue_data}
'''
)

display(Markdown(product_overview.text))


To generate a precise and detailed overview, I would need the actual data contained within `{sales_data}`, `{product_categories_data}`, `{sales_volume_data}`, and `{product_revenue_data}`.

However, I can provide the *structure* and *explain how this overview would be generated*, along with illustrative examples of what the output would look like once the data is provided and processed.

---

## Product Performance Overview (Past Month)

This overview analyzes product performance across various dimensions to identify key trends, top performers, and areas for strategic focus.

### 1. Top Products by Sales Volume & Revenue

**Methodology:**
Products would be ranked based on their total sales volume (units sold) and total revenue generated over the past month, drawing directly from `{sales_volume_data}` and `{product_revenue_data}`. The top 5-10 products in each category would typically be highlighted.

**Illustrative Example Output:**

| Rank | Product Name        | Category     | Sales Volume (Units) | Revenue (USD) |
| :--- | :------------------ | :----------- | :------------------- | :------------ |
| 1    | Ultra HD Smart TV   | Electronics  | 1,500                | $1,500,000    |
| 2    | Premium Coffee Maker| Home Goods   | 2,200                | $220,000      |
| 3    | Ergonomic Office Chair| Furniture    | 800                  | $160,000      |
| 4    | Wireless Earbuds    | Electronics  | 3,000                | $150,000      |
| 5    | Organic Cotton T-Shirt| Apparel      | 5,000                | $100,000      |
| ...  | *(Additional Top Products)* | ...          | ...                  | ...           |

**Key Insights:**
*   Identify high-value items (e.g., Ultra HD Smart TV) that drive significant revenue despite lower unit sales.
*   Recognize high-volume, potentially lower-margin items (e.g., Organic Cotton T-Shirt) that indicate broad appeal.
*   Understand which categories dominate the top-performer list.

### 2. Product Performance by Category

**Methodology:**
Aggregate sales data, sales volume, and revenue for all products within each defined category (from `{product_categories_data}` linked with `{sales_data}`, `{sales_volume_data}`, and `{product_revenue_data}`). Demand can be inferred from sales volume and velocity.

**Illustrative Example Output:**

| Product Category | Total Revenue (USD) | Total Volume (Units) | Average Price Per Unit | Demand Trend (Inferred) | Key Observations                                     |
| :--------------- | :------------------ | :------------------- | :--------------------- | :---------------------- | :--------------------------------------------------- |
| **Electronics**  | $2,100,000          | 6,700                | $313.43                | High & Growing          | Strong revenue driver, high average price points.    |
| **Apparel**      | $350,000            | 12,500               | $28.00                 | Steady                  | High volume, lower revenue per unit. Consistent sales.|
| **Home Goods**   | $450,000            | 3,500                | $128.57                | Moderate                | Stable performance, good margin potential.           |
| **Furniture**    | $300,000            | 1,200                | $250.00                | Moderate                | Fewer units, but high value. Seasonally influenced?  |
| **Books**        | $80,000             | 4,000                | $20.00                 | Declining               | Lowest revenue, consistent volume but slowing down.  |
| ...              | *(Additional Categories)* | ...                  | ...                    | ...                     | ...                                                  |

**Key Insights:**
*   **Electronics** are the primary revenue generator.
*   **Apparel** drives significant unit sales, indicating broad market reach.
*   **Books** show a concerning demand trend, potentially requiring promotional strategies or review.
*   Average price per unit helps understand pricing strategy and market positioning across categories.

### 3. Sales by Product (Detailed Summary)

**Methodology:**
For each individual product, retrieve and summarize its total revenue from `{product_revenue_data}`, total volume sold from `{sales_volume_data}`, and calculate the average price (Revenue / Volume).

**Illustrative Example Output:**

| Product ID | Product Name            | Category     | Total Revenue (USD) | Volume Sold (Units) | Average Price (USD) |
| :--------- | :---------------------- | :----------- | :------------------ | :------------------ | :------------------ |
| P001       | Ultra HD Smart TV       | Electronics  | $1,500,000          | 1,500               | $1,000.00           |
| P002       | Premium Coffee Maker    | Home Goods   | $220,000            | 2,200               | $100.00             |
| P003       | Ergonomic Office Chair  | Furniture    | $160,000            | 800                 | $200.00             |
| P004       | Wireless Earbuds        | Electronics  | $150,000            | 3,000               | $50.00              |
| P005       | Organic Cotton T-Shirt  | Apparel      | $100,000            | 5,000               | $20.00              |
| P006       | Classic Fiction Novel   | Books        | $15,000             | 1,000               | $15.00              |
| P007       | Smart Home Hub          | Electronics  | $400,000            | 1,200               | $333.33             |
| ...        | *(All Products Listed)* | ...          | ...                 | ...                 | ...                 |

**Key Insights:**
*   Provides a granular view of each product's contribution.
*   Helps identify individual products that might be underperforming within a strong category, or surprisingly overperforming in a weak one.
*   Critical for inventory management, procurement, and targeted marketing campaigns for specific SKUs.

---

**Next Steps (with actual data):**

Once the actual data is provided, I can process it to:
1.  Generate precise tables and summaries as outlined above.
2.  Provide specific, data-driven recommendations for inventory, marketing, and pricing strategies.
3.  Identify any outliers or anomalies in sales performance that warrant further investigation.

## Category Distribution

In [22]:
# Category Distribution: This section analyzes the distribution of sales across categories
category_distribution = client.models.generate_content(
    model=MODEL_ID,
    contents='''
Generate insights on the distribution of sales across categories based on the following:

1. **Category-wise Sales Volume**: Summarize the total sales volume across all product categories.
2. **Category-wise Revenue**: Display the total revenue generated from each product category over the past month.
3. **Category-wise Performance**: Provide insights on which product categories are performing the best in terms of sales, revenue, and customer demand.

Data:
- Sales Volume by Category: {sales_volume_by_category_data}
- Revenue by Category: {revenue_by_category_data}
- Product Category Performance: {category_performance_data}
'''
)

display(Markdown(category_distribution.text))


To provide precise insights on the distribution of sales across categories, the actual data for `sales_volume_by_category_data`, `revenue_by_category_data`, and `category_performance_data` is required.

However, I can outline *how* these insights would be generated and what *types* of conclusions could be drawn once the data is provided. Please replace the placeholders with your actual sales data to get the specific analysis.

---

**Insights on Sales Distribution Across Categories (Framework for Analysis)**

**Data Required:**

*   **`sales_volume_by_category_data`**: e.g., `{"Electronics": 1500, "Apparel": 2200, "Home Goods": 800, "Books": 1000, "Food & Beverage": 3500}`
*   **`revenue_by_category_data`**: e.g., `{"Electronics": 350000, "Apparel": 120000, "Home Goods": 90000, "Books": 25000, "Food & Beverage": 70000}`
*   **`category_performance_data`**: This would ideally be a more detailed structure, possibly including trends, growth rates, and specific demand metrics.
    *   Example for a single category: `{"Electronics": {"SalesGrowthMonthOverMonth": "10%", "RevenueGrowthMonthOverMonth": "15%", "CustomerDemandScore": "High (4.5/5 reviews, frequent searches)"}}`

---

**Analysis & Insights:**

Once the data is populated, we would perform the following analysis:

---

### 1. Category-wise Sales Volume

**Analysis:**
We would sum the total sales volume across all product categories to get an overall picture of transactional activity. Then, we would break down the units sold by each category.

**Expected Insights:**
*   **High Volume Categories:** Identify which categories move the most units. This indicates products with high transactional frequency or broad appeal. (e.g., if `Food & Beverage` has the highest volume, it suggests many small, frequent purchases).
*   **Low Volume Categories:** Pinpoint categories with fewer unit sales. These might be niche products, higher-priced items, or items with longer purchase cycles.
*   **Volume Distribution:** Understand if sales volume is concentrated in a few categories or evenly distributed across many.

---

### 2. Category-wise Revenue

**Analysis:**
We would display the total revenue generated from each product category over the past month, allowing for a clear understanding of the monetary contribution of each segment.

**Expected Insights:**
*   **Revenue Drivers:** Clearly identify the "cash cow" categories that bring in the most money. (e.g., if `Electronics` has the highest revenue, it's a key financial pillar).
*   **Revenue vs. Volume Discrepancy:**
    *   **High Revenue, Moderate/Low Volume:** This suggests high-value or high-margin products (e.g., `Electronics` might have lower volume than `Food & Beverage` but significantly higher revenue). These are often critical for profitability.
    *   **High Volume, Moderate/Low Revenue:** Indicates commodity-like products or items with lower price points/margins (e.g., `Food & Beverage` might have high volume but lower total revenue compared to `Electronics`). These contribute to customer traffic and basket size.
*   **Profitability Potential:** Revenue figures are crucial for assessing the financial health and potential of each category.

---

### 3. Category-wise Performance (Sales, Revenue, Customer Demand)

**Analysis:**
This section would combine the volume and revenue data with specific performance metrics related to customer demand and growth trends.

**Expected Insights:**

*   **Best Performing Categories:**
    *   **Definition:** These categories would show a strong positive correlation across high sales volume, high revenue, and robust customer demand.
    *   **Characteristics:**
        *   **High Sales/Revenue Growth:** Consistently increasing unit sales and monetary value month-over-month.
        *   **Strong Customer Demand:** Indicated by high search interest, good conversion rates, positive customer reviews, low return rates, and potentially high repeat purchase rates.
    *   **Example:** If `Electronics` shows high revenue, steady sales growth, and a high "Customer Demand Score" (reflecting strong interest and satisfaction), it would be a top performer.

*   **Underperforming Categories:**
    *   **Definition:** Categories with stagnant or declining sales volume, low revenue contribution, and/or waning customer interest.
    *   **Characteristics:**
        *   Low or negative sales/revenue growth.
        *   Low customer engagement, poor reviews, high return rates, or declining search trends.
    *   **Example:** If `Books` have low volume, low revenue, and "Customer Demand Score" indicating declining interest, it would be an underperformer.

*   **Emerging or Niche Performers:**
    *   **Definition:** Categories that might not be top in absolute volume or revenue yet but show significant positive trends in growth or demand.
    *   **Characteristics:** Rapid month-over-month growth in sales/revenue, sudden spikes in customer interest or searches, positive early feedback.
    *   **Example:** A new category like "Sustainable Living" might have lower absolute numbers but show a "200% MoM growth" in demand and sales, marking it as an emerging opportunity.

---

**Overall Insights & Key Takeaways (Upon Data Provision):**

Once the actual data is provided, the combined analysis would allow us to:

1.  **Identify Core Business Drivers:** Pinpoint the 2-3 categories that are the absolute backbone of your sales and revenue.
2.  **Uncover Hidden Gems:** Discover categories that, despite lower volume, yield high revenue (high-margin products).
3.  **Spot Areas for Improvement:** Clearly see which categories are lagging and require intervention (e.g., marketing boost, pricing adjustment, product line review, inventory reduction).
4.  **Understand Customer Preferences:** Gain a clearer picture of where genuine customer interest and purchasing power lie.
5.  **Inform Strategic Decisions:** Guide decisions on inventory management, marketing budget allocation, product development, and resource deployment across categories.

---

**To get specific, actionable insights, please provide your sales, revenue, and performance data for each category.**

## Product Usage Forecast

In [24]:
# Product Usage Forecast: This section forecasts the future usage of products based on historical sales
product_usage_forecast = client.models.generate_content(
    model=MODEL_ID,
    contents='''
Generate a product usage forecast for the upcoming period based on historical data and trends:

1. **Sales Forecast by Product**: Predict the sales volume for each product in the next quarter/year.
2. **Demand Forecast**: Provide a demand forecast for products based on historical sales, usage probabilities, and seasonal patterns.
3. **Future Stock Levels**: Estimate the required stock levels for each product to meet forecasted demand.

Data:
- Historical Sales Data: {historical_sales_data}
- Product Usage Data: {usage_probabilities}
- Seasonal Patterns: {seasonal_sales_patterns_data}
- Current Stock Levels: {current_inventory_data}
'''
)

display(Markdown(product_usage_forecast.text))


To generate a comprehensive product usage forecast, I first need to define and simulate the `historical_sales_data`, `usage_probabilities`, `seasonal_sales_patterns_data`, and `current_inventory_data` as these were provided as placeholders.

**Disclaimer:** The following forecast is based on *simulated data* and a simplified forecasting methodology. For real-world applications, actual granular data and more sophisticated time-series analysis (e.g., ARIMA, Prophet, exponential smoothing) would be required.

---

### Executive Summary

This report provides a product usage forecast for the upcoming quarter (Q1 2024), covering sales predictions, overall demand, and recommended stock levels. Based on simulated historical data, Product A is expected to maintain its lead in sales and demand, followed by Product B and Product C, which shows significant growth potential but lower individual usage probability. Strategic ordering is recommended to ensure optimal stock levels and avoid both stockouts and excessive inventory.

---

### 1. Simulated Data Used for Forecast

To demonstrate the methodology, I've created plausible data sets:

**A. Historical Sales Data (Monthly for 2023)**

| Month | Product A (Units) | Product B (Units) | Product C (Units) |
| :---- | :------------------ | :------------------ | :------------------ |
| Jan-23 | 95                  | 48                  | 18                  |
| Feb-23 | 98                  | 47                  | 19                  |
| Mar-23 | 102                 | 50                  | 22                  |
| Apr-23 | 105                 | 52                  | 23                  |
| May-23 | 108                 | 51                  | 24                  |
| Jun-23 | 110                 | 53                  | 26                  |
| Jul-23 | 107                 | 50                  | 25                  |
| Aug-23 | 106                 | 49                  | 24                  |
| Sep-23 | 103                 | 48                  | 22                  |
| Oct-23 | 112                 | 54                  | 27                  |
| Nov-23 | 115                 | 55                  | 28                  |
| Dec-23 | 118                 | 56                  | 30                  |

**B. Product Usage Probabilities**
*(Interpretation: The probability that a sold unit of a product will be actively used or consumed within the forecast period.)*

*   **Product A:** 0.95 (95% of sold units are typically used)
*   **Product B:** 0.85 (85% of sold units are typically used)
*   **Product C:** 0.70 (70% of sold units are typically used)

**C. Seasonal Sales Patterns Data (Quarterly Multipliers)**
*(Based on general observation, Q1 is often slower, Q2/Q3 stable, Q4 peak.)*

*   **Q1 (Jan-Mar):** 0.90 (10% lower than average)
*   **Q2 (Apr-Jun):** 1.05 (5% higher than average)
*   **Q3 (Jul-Sep):** 1.00 (Average)
*   **Q4 (Oct-Dec):** 1.15 (15% higher than average)

**D. Current Stock Levels (As of End of Dec 2023)**

*   **Product A:** 200 units
*   **Product B:** 100 units
*   **Product C:** 50 units

---

### 2. Assumptions for Forecasting

*   **Forecasting Period:** Q1 2024 (January, February, March)
*   **Forecasting Method:**
    *   **Base Sales:** Average monthly sales from the preceding year (2023) are used as a baseline.
    *   **Trend:** A conservative overall growth trend of **+5%** year-over-year (applied to the quarterly average) is assumed for all products, reflecting general market expansion or internal initiatives.
    *   **Seasonality:** The predefined quarterly multipliers are applied to adjust for seasonal fluctuations.
*   **Safety Stock:** A **15% safety stock** buffer is added to the forecasted demand to mitigate against unforeseen demand spikes or supply chain delays.
*   **Lead Time:** Assumed to be short enough that stock can be acquired to meet the Q1 demand by the start of the period.
*   **Usage Probability Interpretation:** Directly applied as a multiplier to the sales forecast to derive effective usage demand.

---

### 3. Forecasting Methodology & Calculations

#### Step 1: Calculate Average Monthly/Quarterly Historical Sales (2023)

*   **Product A:** Sum = 1279 units. Average Monthly = 1279 / 12 = 106.58 units. Average Quarterly = 106.58 * 3 = **319.75 units.**
*   **Product B:** Sum = 603 units. Average Monthly = 603 / 12 = 50.25 units. Average Quarterly = 50.25 * 3 = **150.75 units.**
*   **Product C:** Sum = 288 units. Average Monthly = 288 / 12 = 24.00 units. Average Quarterly = 24.00 * 3 = **72.00 units.**

---

### 1. Sales Forecast by Product (Q1 2024)

**Formula:** `(Average Quarterly Sales 2023) * (1 + Annual Growth Trend) * Seasonal Multiplier (Q1)`

*   **Product A:**
    *   319.75 * (1 + 0.05) * 0.90
    *   319.75 * 1.05 * 0.90 = **302.16 units ≈ 302 units**
*   **Product B:**
    *   150.75 * (1 + 0.05) * 0.90
    *   150.75 * 1.05 * 0.90 = **142.46 units ≈ 142 units**
*   **Product C:**
    *   72.00 * (1 + 0.05) * 0.90
    *   72.00 * 1.05 * 0.90 = **68.04 units ≈ 68 units**

**Sales Forecast Summary (Q1 2024)**

| Product   | Forecasted Sales (Units) |
| :-------- | :----------------------- |
| Product A | 302                      |
| Product B | 142                      |
| Product C | 68                       |

---

### 2. Demand Forecast (Q1 2024)

**Formula:** `Sales Forecast * Product Usage Probability`

*   **Product A:**
    *   302 units (Sales Forecast) * 0.95 (Usage Probability) = **286.9 units ≈ 287 units**
*   **Product B:**
    *   142 units (Sales Forecast) * 0.85 (Usage Probability) = **120.7 units ≈ 121 units**
*   **Product C:**
    *   68 units (Sales Forecast) * 0.70 (Usage Probability) = **47.6 units ≈ 48 units**

**Demand Forecast Summary (Q1 2024)**

| Product   | Forecasted Demand (Units) |
| :-------- | :------------------------ |
| Product A | 287                       |
| Product B | 121                       |
| Product C | 48                        |

---

### 3. Future Stock Levels (Required to meet Q1 2024 Demand)

**Formulas:**
*   `Safety Stock = Forecasted Demand * 0.15`
*   `Required Stock Level = Forecasted Demand + Safety Stock`
*   `Units to Order = Required Stock Level - Current Stock Level` (If positive, order; if zero or negative, no order needed, or potentially surplus to manage).

*Self-correction: The "Future Stock Levels" typically refers to the *target* stock level to have at the *start* of the period to cover the period's demand. The "Units to Order" is the action needed.*

**A. Product A:**
*   Demand: 287 units
*   Safety Stock: 287 * 0.15 = 43.05 units ≈ 43 units
*   **Required Stock Level (Target):** 287 + 43 = **330 units**
*   Current Stock: 200 units
*   **Units to Order:** 330 - 200 = **130 units**

**B. Product B:**
*   Demand: 121 units
*   Safety Stock: 121 * 0.15 = 18.15 units ≈ 18 units
*   **Required Stock Level (Target):** 121 + 18 = **139 units**
*   Current Stock: 100 units
*   **Units to Order:** 139 - 100 = **39 units**

**C. Product C:**
*   Demand: 48 units
*   Safety Stock: 48 * 0.15 = 7.2 units ≈ 7 units
*   **Required Stock Level (Target):** 48 + 7 = **55 units**
*   Current Stock: 50 units
*   **Units to Order:** 55 - 50 = **5 units**

**Future Stock Levels & Ordering Summary (Q1 2024)**

| Product   | Forecasted Demand (Units) | Safety Stock (Units) | Required Stock Level (Target) | Current Stock (Units) | Units to Order (Approx.) |
| :-------- | :------------------------ | :------------------- | :---------------------------- | :-------------------- | :----------------------- |
| Product A | 287                       | 43                   | 330                           | 200                   | 130                      |
| Product B | 121                       | 18                   | 139                           | 100                   | 39                       |
| Product C | 48                        | 7                    | 55                            | 50                    | 5                        |

---

### 4. Recommendations and Next Steps

1.  **Prioritize Product A:** It continues to be the highest demand product. Ensure its supply chain is robust to meet the forecasted 130 units needed for Q1.
2.  **Monitor Product C:** While its forecasted sales and demand are lower, it has a lower usage probability (0.70), suggesting some units sold might not translate to immediate active use. The current stock is nearly sufficient, but 5 units are still needed. Its historical data shows strong growth, so continued monitoring for a higher growth trend in future forecasts might be warranted.
3.  **Review Safety Stock Levels:** The 15% safety stock is a general assumption. For critical products or those with high demand variability or long lead times, a more dynamic safety stock calculation (e.g., based on forecast error and lead time variability) would be beneficial.
4.  **Refine Forecasting Model:** For more accuracy, consider:
    *   **More Granular Data:** Weekly or daily sales data can capture short-term trends.
    *   **Advanced Models:** Time series analysis methods (ARIMA, Exponential Smoothing, Prophet) can better capture complex trends, seasonality, and cycles.
    *   **External Factors:** Incorporate market trends, marketing campaigns, competitor activities, economic indicators, and holiday effects.
    *   **Product Lifecycle:** Account for new product introductions, mature products, and products nearing end-of-life.
5.  **Iterate and Adjust:** Forecasting is an ongoing process. Regularly review actual sales and demand against forecasts and adjust the model parameters accordingly for improved accuracy in subsequent periods.

This structured forecast provides a solid basis for operational planning for Q1 2024.

## Sales Insights

In [26]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Sales Data
sales_data = orderData

# Sales Predictions
current_date = datetime.now()
next_month_date = current_date + relativedelta(months=1)
next_month_yearmonth = next_month_date.strftime("%Y-%m")

sales_pred_input = s.demand_forecast_preprocessor(orderData, inventoryData)
sales_predictions = []

for index, row in sales_pred_input.iterrows():
    sales_predictions.append({
        'Category Name': row.Category,
        'Customer Segment': row.CustomerSegment,
        'Predicted Demand for next month': s.predict_demand_forecast({
            'category': row.Category,
            'month': next_month_yearmonth,
            'avg_price': row.AveragePrice,
            'customer_segment': row.CustomerSegment,
            'discount_rate': row.AverageDiscount
        })[0]
    })

# Product Categories
product_categories = ["Clothing","Technology","Sports and Fitness","Other"]

# Current Inventory
current_inventory = inventoryData

# Usage Probabilities
usage_probabilities = "Currently empty. Please ignore this section for now."

configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

  setstate(state)
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [27]:
# Sales Insights Section
section_sales_insights = client.models.generate_content(
    model=MODEL_ID,
    contents=
f'''Generate a sales insights report that describes information for the following areas:

1. Sales Trends: Summarize sales based on the provided data. Provide insights on which product categories are seeing the highest demand.
2. Product Performance: Analyze the best-selling product categories by quantity. Highlight the top 3 performing categories.
3. Product Demand Forecast: Based on the historical sales and usage probability, forecast the demand for the next month.
4. Restocking or Discontinuation: Recommend which products should be restocked and which should be discontinued, based on sales trends and inventory levels.

Data:
- Historical sales data: {sales_data}
- Sales volume predictions for next month: {sales_predictions}
- Product categories: {product_categories}
- Current inventory levels: {current_inventory}
- Usage probabilities: {usage_probabilities}'''
)

display(Markdown(section_sales_insights.text))

## Storage Optimizations

In [30]:
location_predictions = []
for item in inventoryData:
    location_predictions.append({
        'Item Id': item['ItemId'],
        'Current Location': item['Location'],
        'Predicted Location': s.predict_location({
            'Priority': item['Priority'],
            'Product_Type': item['Category'],
            'Size': item['Size'],
            'Order_Quantity': item['Quantity'],
            'Weight': item['Weight']
        })[0]
    })

section_storage_optimizations = client.models.generate_content(
    model=MODEL_ID,
    contents=
f'''Provide detailed storage optimization recommendations based on:

1. Current storage utilization metrics
2. Model-predicted optimal locations vs current locations
3. List of items flagged for relocation, including:
   - Current location
   - Recommended location

Data:
{inventoryData}
{location_predictions}
'''
)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


display(Markdown(section_storage_optimizations.text))

## Anomalies Detected

In [38]:
section_anomalies_detected = client.models.generate_content(
    model=MODEL_ID,
    contents=
f'''Generate an anomalies section that lists all detected storage anomalies detected in a table. Include each item's current location, predicted location, item id, and name.
Include the reason for each anomaly.

Data:
{s.detect_anomalies(inventoryData)}
'''
)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


display(Markdown(section_anomalies_detected.text))

## Summary

In [41]:
# Read Markdown content
monthly_report = f'''<h1 style="text-align:center;">Monthly Report</h1><br>

# Products Overview:
section_products_overview.text

# Category Distribution:
section_category_distribution.text

# Product Usage Forecast:
section_product_usage.text

# Sales Insights:
{section_sales_insights.text}

# Storage Optimizations:
{section_storage_optimizations.text}

# Anomalies Detected:
{section_anomalies_detected.text}'''

In [42]:
section_summary = client.models.generate_content(
    model=MODEL_ID,
    contents=
f'''Provide a brief and concise summary of the provided report, covering all the key points highlighted by each section. Highlight the important information for each section in a bullet list, with the final paragraph providing general insight on overall performance.

Report:
{monthly_report}'''
)

display(Markdown(section_summary.text))

# PDF File Generation

In [45]:
current_date = datetime.now()
monthly_report = f"""# <h1 style="text-align:center;">Monthly Report ({current_date.date()})</h1><br>

# Products Overview:
section_products_overview.text

# Category Distribution:
section_category_distribution.text

# Product Usage Forecast:
section_product_usage.text

# Sales Insights:
{section_sales_insights.text}

# Storage Optimizations:
{section_storage_optimizations.text}

# Anomalies Detected:
{section_anomalies_detected.text}

# Summary:
{section_summary.text}"""

In [46]:
from markdown_pdf import MarkdownPdf, Section

pdf = MarkdownPdf(toc_level=1)
pdf.add_section(Section(monthly_report)) # Add Section(md_content, user_css=css_content) for custom CSS
pdf.save(f"MonthlyReport_({current_date.date()}).pdf")