In [1]:
from sqlalchemy import create_engine
import pymysql
import pandas as pd

engine = create_engine("mysql+pymysql://myuser:yourpassword@localhost/F&V company")

df = pd.read_sql("SELECT * FROM SalesOrders", engine)
#print(df.head())



In [83]:
# Ensure DeliveryDate is in datetime format
df['DeliveryDate'] = pd.to_datetime(df['DeliveryDate'])
df['Month'] = df['DeliveryDate'].dt.to_period('M').dt.to_timestamp() 

# Group by DeliveryDate (assuming you want to group by entire date)
grouped_df = df.drop(columns=['OrderID', 'CustomerID', 'DeliveryDate'])
grouped_df = grouped_df.groupby(['Month', 'ProductID', 'PlantID' ]).sum()  # or you can use .mean(), .count(), etc.
grouped_df = grouped_df.rename(columns={'OrderVolume': 'Volume'})
#
grouped_df['month_for_forecast'] = grouped_df.index.get_level_values('Month') + pd.DateOffset(months=12)

print(grouped_df.head())
grouped_df.to_csv("grouped_df.csv", index=False)

                               Volume month_for_forecast
Month      ProductID PlantID                            
2020-05-01 1         1        4280.73         2021-05-01
                     2         189.03         2021-05-01
                     3        1788.45         2021-05-01
           2         1         757.31         2021-05-01
                     2         539.08         2021-05-01


In [84]:
import plotly.express as px
from ipywidgets import interact, IntSlider

# Make sure you're working with the correct DataFrame
grouped_df_chart = grouped_df.reset_index()
grouped_df_chart['Month'] = pd.to_datetime(grouped_df_chart['Month'])


# Interactive function
def plot_volume(plant_id=1, product_id=1):
    filtered_df = grouped_df_chart[
        (grouped_df_chart['PlantID'] == plant_id) & (grouped_df_chart['ProductID'] == product_id)
    ]
    
    if filtered_df.empty:
        print("No data for selected PlantID and ProductID.")
        return

    fig = px.line(
        filtered_df,
        x='Month',
        y='Volume',
        title=f'Volume for Plant {plant_id}, Product {product_id}'
    )
    fig.update_traces(mode='lines+markers')
    fig.show()

# Interactive widgets
interact(
    plot_volume,
    plant_id=IntSlider(min=grouped_df_chart['PlantID'].min(), max=grouped_df_chart['PlantID'].max(), step=1, value=1),
    product_id=IntSlider(min=grouped_df_chart['ProductID'].min(), max=grouped_df_chart['ProductID'].max(), step=1, value=1)
)


interactive(children=(IntSlider(value=1, description='plant_id', max=5, min=1), IntSlider(value=1, description…

<function __main__.plot_volume(plant_id=1, product_id=1)>

In [107]:
#Calculatig std() of volume per each combination of plant material and month in the past 5yrs
# Ensure 'Month' is a datetime and reset index if needed
grouped_df2 = grouped_df.reset_index()
grouped_df2['Month'] = pd.to_datetime(grouped_df2['Month'])

# Add MonthNum
grouped_df2['MonthNum'] = grouped_df2['Month'].dt.month

# For clarity, let's retain the first Month for each group as a reference
grouped_df2['Month'] = grouped_df2.groupby(['MonthNum', 'PlantID', 'ProductID'])['Month'].transform('min')

# Now group and calculate std
std_df = grouped_df2.groupby(['MonthNum', 'PlantID', 'ProductID']).agg({
    'Volume': 'std',
}).reset_index()

std_df = std_df[['MonthNum', 'PlantID', 'ProductID', 'Volume']]
std_df = std_df.rename(columns={'Volume': 'Volume_std'})
std_df = std_df.set_index(['MonthNum', 'PlantID', 'ProductID'])
std_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Volume_std
MonthNum,PlantID,ProductID,Unnamed: 3_level_1
1,1,1,3089.260076
1,1,2,
1,1,3,2198.269542
1,1,4,468.039390
1,1,5,1634.643603
...,...,...,...
12,5,146,4127.212576
12,5,147,529.101483
12,5,148,1895.593164
12,5,149,2924.094330


In [86]:
# Check the latest date and define first forecast month
max_month = df['Month'].max() 
first_forecast_month = max_month + pd.DateOffset(months=1)
last_forecast_month = first_forecast_month + pd.DateOffset(months=10)
#print(max_month)
#print(first_forecast_month)
#print(last_forecast_month)



In [87]:
# Create forecast months
all_forecast_months = pd.date_range(start=max_month, end=last_forecast_month, freq='MS')  # 'MS' stands for Month Start
#all_forecast_months



In [138]:
# import products and plant to create the base
products_df = pd.read_sql("SELECT ProductID FROM Products", engine)
plant_df = pd.read_sql("SELECT PlantID FROM Plant", engine)

# Create forecast dataframe
forecast_df = []

for month in all_forecast_months:
    for _, plant in plant_df.iterrows():
        for _, product in products_df.iterrows():
            forecast_df.append({
                'Month': month,
                'PlantID': plant['PlantID'],
                'ProductID': product['ProductID']
                
            })

#Creating dataframe
forecast_df = pd.DataFrame(forecast_df)
forecast_df['month_for_forecast'] = forecast_df['Month'] + pd.DateOffset(months=-12)

forecast_df


Unnamed: 0,Month,PlantID,ProductID,month_for_forecast
0,2025-05-01,1,1,2024-05-01
1,2025-05-01,1,2,2024-05-01
2,2025-05-01,1,3,2024-05-01
3,2025-05-01,1,4,2024-05-01
4,2025-05-01,1,5,2024-05-01
...,...,...,...,...
8995,2026-04-01,5,146,2025-04-01
8996,2026-04-01,5,147,2025-04-01
8997,2026-04-01,5,148,2025-04-01
8998,2026-04-01,5,149,2025-04-01


In [139]:
#Merging to get forecast
forecast_df = forecast_df.merge(grouped_df, how='left', on=['month_for_forecast', 'PlantID', 'ProductID'])
forecast_df = forecast_df.drop(columns='month_for_forecast')

#Merging to get monthly std
forecast_df['MonthNum'] = forecast_df['Month'].dt.month
forecast_df = forecast_df.merge(std_df, how='left', on=['MonthNum', 'PlantID', 'ProductID'])




In [140]:
forecast_df['Volume_std'].max()
forecast_df

Unnamed: 0,Month,PlantID,ProductID,Volume,MonthNum,Volume_std
0,2025-05-01,1,1,,5,2024.093984
1,2025-05-01,1,2,6989.07,5,2735.121393
2,2025-05-01,1,3,575.07,5,463.788238
3,2025-05-01,1,4,1699.67,5,2309.533024
4,2025-05-01,1,5,2380.12,5,1759.370177
...,...,...,...,...,...,...
8995,2026-04-01,5,146,186.89,4,1325.647138
8996,2026-04-01,5,147,1436.00,4,294.565160
8997,2026-04-01,5,148,1508.32,4,4010.399638
8998,2026-04-01,5,149,,4,1938.568596


In [141]:
import numpy as np

min_val = -forecast_df['Volume_std'] / 100
max_val = forecast_df['Volume_std'] / 100

# Generate random float per row
random_float = np.random.uniform(0, 1, size=len(forecast_df)) * (max_val - min_val) + min_val

forecast_df['Volume'] = np.where(
    forecast_df['Volume'] + random_float < 0,
    0,
    forecast_df['Volume'] + random_float
)

forecast_df = forecast_df.drop(['Volume_std', 'MonthNum'], axis=1)
forecast_df['Volume'] = forecast_df['Volume'].fillna(0)

forecast_df

Unnamed: 0,Month,PlantID,ProductID,Volume
0,2025-05-01,1,1,0.000000
1,2025-05-01,1,2,6987.494854
2,2025-05-01,1,3,574.880375
3,2025-05-01,1,4,1716.425227
4,2025-05-01,1,5,2377.272237
...,...,...,...,...
8995,2026-04-01,5,146,184.560464
8996,2026-04-01,5,147,1437.403058
8997,2026-04-01,5,148,1520.862017
8998,2026-04-01,5,149,0.000000


In [142]:
#Setting indexes
forecast_df = forecast_df.set_index(['Month', 'PlantID', 'ProductID'])



forecast_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Volume
Month,PlantID,ProductID,Unnamed: 3_level_1
2025-05-01,1,1,0.0
2025-05-01,1,2,6987.494854
2025-05-01,1,3,574.880375
2025-05-01,1,4,1716.425227
2025-05-01,1,5,2377.272237


In [143]:
# Convert DataFrame rows to SQL VALUES format
values = [
    f"('{row.Month.date()}', {row.PlantID}, {row.ProductID}, {row.Volume:.2f})"
    for _, row in forecast_df.reset_index().iterrows()
]

# Write to SQL file
with open("Forecast.sql", "w") as f:
    f.write("DROP TABLE IF EXISTS Forecast;\n")
    f.write("CREATE TABLE Forecast(\n    Month DATE,\n    PlantID INT,\n    ProductID INT,\n    Volume DECIMAL(20,2)\n);\n")
    f.write("INSERT INTO Forecast (Month, PlantID, ProductID, Volume)\nVALUES\n")
    f.write(",\n".join(values))
    f.write(";\n")