In [18]:
import pandas as pd
import numpy as np

# Load data (update filename as needed)
df = pd.read_csv('sales_top10_skus.csv')

# Melt to long format: t, Year, Month stay as id_vars, others are SKUs
id_vars = ['t', 'Year', 'Month']
sku_columns = [col for col in df.columns if col not in id_vars]
df_long = df.melt(id_vars=id_vars, var_name='SKU', value_name='Sales')

# Remove possible $/comma formatting and convert to float
df_long['Sales'] = (df_long['Sales'].replace('[\$,]', '', regex=True)).astype(float)


In [19]:
# Add a truncated 'date' column

month_map = {
    'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
    'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12
}
df_long['Month_num'] = df_long['Month'].map(month_map)
df_long['Date'] = pd.to_datetime(df_long['Year'].astype(str) + '-' + df_long['Month_num'].astype(str) + '-01')


# Show the first 5 rows
df_long.head()

Unnamed: 0,t,Year,Month,SKU,Sales,Month_num,Date
0,1,2021,January,6HSD4J,811306.11,1,2021-01-01
1,2,2021,February,6HSD4J,660086.35,2,2021-02-01
2,3,2021,March,6HSD4J,849879.16,3,2021-03-01
3,4,2021,April,6HSD4J,1379693.06,4,2021-04-01
4,5,2021,May,6HSD4J,759690.8,5,2021-05-01


In [22]:
# Forecast Each SKU with Simple Exponential Smoothing Method

from statsmodels.tsa.holtwinters import SimpleExpSmoothing

results = []
sku_list = df_long['SKU'].unique()

for sku in sku_list:
    sku_data = df_long[df_long['SKU'] == sku].sort_values('Date')
    y = sku_data['Sales'].values

    # Skip SKUs with all NaN or zero or too short
    if pd.isnull(y).all() or (y == 0).all() or len(y) < 4:
        continue

    # Fit Simple Exponential Smoothing
    model = SimpleExpSmoothing(y, initialization_method="estimated")
    fit = model.fit(optimized=True)
    forecast = fit.forecast(6)

    # Forecast months
    last_date = sku_data['Date'].max()
    forecast_dates = pd.date_range(last_date + pd.offsets.MonthBegin(1), periods=6, freq='MS')
    for d, val in zip(forecast_dates, forecast):
        results.append({'SKU': sku, 'Forecast_Month': d.strftime('%Y-%m'), 'Forecasted_Sales': val})

# Show result as DataFrame
forecast_df = pd.DataFrame(results)
print(forecast_df)



       SKU Forecast_Month  Forecasted_Sales
0   6HSD4J        2022-02      7.522817e+05
1   6HSD4J        2022-03      7.522817e+05
2   6HSD4J        2022-04      7.522817e+05
3   6HSD4J        2022-05      7.522817e+05
4   6HSD4J        2022-06      7.522817e+05
5   6HSD4J        2022-07      7.522817e+05
6   7XL27C        2022-02      2.184225e+05
7   7XL27C        2022-03      2.184225e+05
8   7XL27C        2022-04      2.184225e+05
9   7XL27C        2022-05      2.184225e+05
10  7XL27C        2022-06      2.184225e+05
11  7XL27C        2022-07      2.184225e+05
12  BJ30D6        2022-02      9.793043e+04
13  BJ30D6        2022-03      9.793043e+04
14  BJ30D6        2022-04      9.793043e+04
15  BJ30D6        2022-05      9.793043e+04
16  BJ30D6        2022-06      9.793043e+04
17  BJ30D6        2022-07      9.793043e+04
18   FJD6B        2022-02      2.102086e+06
19   FJD6B        2022-03      2.102086e+06
20   FJD6B        2022-04      2.102086e+06
21   FJD6B        2022-05      2

In [30]:
# Save output to csv file (long format for further analysis)
forecast_df.to_csv("forecast_next6m.csv", index=False)


In [44]:
# Optionally: Convert forecast_next6m.csv to wide format for easier read

# Pivot to wide format: each SKU as a column, rows are Forecast_Month
wide_df = forecast_df.pivot(index='Forecast_Month', columns='SKU', values='Forecasted_Sales').reset_index()

# Save to CSV
wide_df.to_csv('forecast_next6m_wide.csv', index=False)