In [109]:
#Previsão de demanda

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

In [110]:
df = pd.read_csv('plan/train.csv')

df['date'] = pd.to_datetime(df['date'])

df = df.filter(["date", "item", "sales"]).groupby([pd.Grouper(key="date", freq="W-MON"), "item"]).sum().reset_index()

df

Unnamed: 0,date,item,sales
0,2013-01-07,1,894
1,2013-01-07,2,2320
2,2013-01-07,3,1444
3,2013-01-07,4,834
4,2013-01-07,5,664
...,...,...,...
13045,2018-01-01,46,3182
13046,2018-01-01,47,1166
13047,2018-01-01,48,2655
13048,2018-01-01,49,1546


In [111]:

# Defina uma função para aplicar o deslocamento de vendas por item
def add_shift_sale(group):
    group['shift_sale'] = group['sales'].shift(1)
    return group.iloc[1:]

# Agrupar o DataFrame por 'item' e aplicar a função para cada grupo
df = df.groupby('item').apply(add_shift_sale).reset_index(drop=True)

df

Unnamed: 0,date,item,sales,shift_sale
0,2013-01-14,1,863,894.0
1,2013-01-21,1,867,863.0
2,2013-01-28,1,816,867.0
3,2013-02-04,1,969,816.0
4,2013-02-11,1,920,969.0
...,...,...,...,...
12995,2017-12-04,50,4503,5517.0
12996,2017-12-11,50,3829,4503.0
12997,2017-12-18,50,3856,3829.0
12998,2017-12-25,50,3900,3856.0


In [112]:
models = []
predicted_sales = []
metric_mse = []
metric_mae = []
metric_r2 = []
item_df = []

n = df['item'].nunique()

for item in range(1, n + 1):
    item_data = df[df['item'] == item]

    X = item_data[['sales', 'shift_sale']]
    y = item_data['sales']

    # Normalização dos dados
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = np.ravel(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    model = RandomForestRegressor(n_estimators=100, random_state=0)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)


    item_models[item] = model
    item_predictions[item] = y_pred[0]
    item_metric[item] = [mse, mae, r2]

    item_df.append(item)
    predicted_sales.append(y_pred[0])
    metric_mse.append(mse)
    metric_mae.append(mae)
    metric_r2.append(r2)


df_final = pd.DataFrame({'Item': item_df, 'Previsão de Vendas': predicted_sales, 'MSE': metric_mse, 'MAE': metric_mae, 'R2':  metric_r2})

df_final




Unnamed: 0,Item,Previsão de Vendas,MSE,MAE,R2
0,1,2137.79,29.530154,3.813462,0.999746
1,2,5648.69,1154.425221,15.099423,0.99854
2,3,3534.74,155.296058,7.196538,0.999486
3,4,2160.58,82.128633,5.332115,0.999272
4,5,1812.28,34.167125,3.358654,0.999581
5,6,5694.52,1081.371117,15.100192,0.998599
6,7,5658.0,367.017696,13.189615,0.999529
7,8,7565.03,1382.91121,20.088654,0.998991
8,9,4957.51,347.636121,9.409423,0.999412
9,10,7128.81,779.350063,17.729038,0.999343
