In [1]:
from sqlalchemy import create_engine
import pymysql
import pandas as pd

engine = create_engine("mysql+pymysql://myuser:yourpassword@localhost/F&V company")

df = pd.read_sql("SELECT * FROM SalesOrders", engine)
print(df.head())



ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [107]:
# Ensure DeliveryDate is in datetime format
df['DeliveryDate'] = pd.to_datetime(df['DeliveryDate'])
df['Month'] = df['DeliveryDate'].dt.to_period('M').dt.to_timestamp() 

# Group by DeliveryDate (assuming you want to group by entire date)
grouped_df = df.drop(columns=['OrderID', 'CustomerID', 'DeliveryDate'])
grouped_df = grouped_df.groupby(['Month', 'ProductID', 'PlantID' ]).sum()  # or you can use .mean(), .count(), etc.
grouped_df = grouped_df.rename(columns={'OrderVolume': 'Volume'})
#
grouped_df['month_for_forecast'] = grouped_df.index.get_level_values('Month') + pd.DateOffset(months=12)

print(grouped_df.head())

                               Volume month_for_forecast
Month      ProductID PlantID                            
2020-05-01 1         1        4280.73         2021-05-01
                     2         189.03         2021-05-01
                     3        1788.45         2021-05-01
           2         1         757.31         2021-05-01
                     2         539.08         2021-05-01


In [96]:
# Check the latest date and define first forecast month
max_month = df['Month'].max() 
first_forecast_month = max_month + pd.DateOffset(months=1)
last_forecast_month = first_forecast_month + pd.DateOffset(months=10)
print(max_month)
print(first_forecast_month)
print(last_forecast_month)



2025-05-01 00:00:00
2025-06-01 00:00:00
2026-04-01 00:00:00


In [97]:
# Create forecast months
all_forecast_months = pd.date_range(start=max_month, end=last_forecast_month, freq='MS')  # 'MS' stands for Month Start
all_forecast_months



DatetimeIndex(['2025-05-01', '2025-06-01', '2025-07-01', '2025-08-01',
               '2025-09-01', '2025-10-01', '2025-11-01', '2025-12-01',
               '2026-01-01', '2026-02-01', '2026-03-01', '2026-04-01'],
              dtype='datetime64[ns]', freq='MS')

In [131]:

# import products and plant to create the base
products_df = pd.read_sql("SELECT ProductID FROM Products", engine)
plant_df = pd.read_sql("SELECT PlantID FROM Plant", engine)

# Create forecast dataframe
forecast_df = []

for month in all_forecast_months:
    for _, plant in plant_df.iterrows():
        for _, product in products_df.iterrows():
            forecast_df.append({
                'Month': month,
                'PlantID': plant['PlantID'],
                'ProductID': product['ProductID']
                
            })

#Creating dataframe
forecast_df = pd.DataFrame(forecast_df)
forecast_df['month_for_forecast'] = forecast_df['Month'] + pd.DateOffset(months=-12)


#Merging to get forecast
forecast_df = forecast_df.merge(grouped_df, how='left', on=['month_for_forecast', 'PlantID', 'ProductID'])
forecast_df['Volume'] = forecast_df['Volume'].fillna(0)

#Setting indexes
forecast_df = forecast_df.drop(columns='month_for_forecast')
forecast_df = forecast_df.set_index(['Month', 'PlantID', 'ProductID'])


forecast_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Volume
Month,PlantID,ProductID,Unnamed: 3_level_1
2025-05-01,1,1,0.0
2025-05-01,1,2,6989.07
2025-05-01,1,3,575.07
2025-05-01,1,4,1699.67
2025-05-01,1,5,2380.12


In [134]:
# Convert DataFrame rows to SQL VALUES format
values = [
    f"('{row.Month.date()}', {row.PlantID}, {row.ProductID}, {row.Volume:.2f})"
    for _, row in forecast_df.reset_index().iterrows()
]

# Write to SQL file
with open("Forecast.sql", "w") as f:
    f.write("DROP TABLE IF EXISTS Forecast;\n")
    f.write("CREATE TABLE Forecast(\n    Month DATE,\n    PlantID INT,\n    ProductID INT,\n    Volume DECIMAL(20,2)\n);\n")
    f.write("INSERT INTO Forecast (Month, PlantID, ProductID, Volume)\nVALUES\n")
    f.write(",\n".join(values))
    f.write(";\n")