In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


In [None]:
df=pd.read_csv("/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv")
datax=df.sample(n=300, random_state=123)
datax.set_index('id',inplace=True)

datax=datax.iloc[:,-1941:]


In [None]:
def train_test_split(data, n_test):
    return data[:-n_test, :], data[-n_test:, :]

def calculate_zeros_percentage(arr):
    num_zeros = np.count_nonzero(arr == 0)
    total_elements = arr.size
    zeros_percentage = (num_zeros / total_elements) * 100
    return zeros_percentage
    
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols = list()
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    agg = pd.concat(cols, axis=1)
    if dropnan:
        agg.dropna(inplace=True)
    return agg.values

import seaborn as sns

import matplotlib.pyplot as plt
def prediction_plot(testY, test_predict):

    len_prediction=[x for x in range(len(testY))]
    plt.figure(figsize=(8,4))
    plt.plot(len_prediction, testY[:len(testY)], marker='.', label="actual")
    plt.plot(len_prediction, test_predict[:len(testY)], 'r', label="prediction")
    plt.tight_layout()
    sns.despine(top=True)
    plt.subplots_adjust(left=0.07)
    plt.ylabel('Pred Trend', size=15)
    plt.xlabel('Time step', size=15)
    plt.legend(fontsize=15)
    plt.show();
    
def smape(a, f):
    a = np.array([float(val) for val in a])
    f = np.array([float(val) for val in f])
    
    numerator = 2 * np.abs(f - a)
    denominator = np.abs(a) + np.abs(f)
    
    return 100 / len(a) * np.sum(numerator / denominator)

In [None]:
params = {
        "objective": "poisson",
        "metric": "poisson",
        "learning_rate": 0.09,
        "sub_feature": 0.9,
        "sub_row": 0.75,
        "bagging_freq": 1,
        "lambda_l2": 0.1,
        'verbosity': 1,
        'num_iterations': 2000,
        'num_leaves': 32,
        "min_data_in_leaf": 50,
            }


In [None]:
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

smapeList=[]
predsList=[]
for i in range(300):
    print("-----------------------",i,"-------------------------------")
    
    #MA=datax.iloc[[i][0]].rolling(window=5).mean().dropna()
    nMA=datax.iloc[[i][0]]
    values =nMA.values
    
    # transform the time series data into supervised learning
    values= np.array(values).reshape(-1, 1)
    data = series_to_supervised(values, n_in=6)
    train, test = train_test_split(data, 28)
    train = np.asarray(train)
    trainX, trainY = train[:, :-1], train[:, -1]
    testX, testY= test[:, :-1], test[:, -1]
    train_data = lgb.Dataset(trainX, label=trainY)
    valid_data = lgb.Dataset(testX, label=testY)
    m_lgb = lgb.train(params, train_data, valid_sets=[train_data, valid_data],
                      verbose_eval=20, early_stopping_rounds=30,num_boost_round = 3600)
    
    preds=m_lgb.predict(testX)
    
    smapeList.append(smape(testY,preds))
    predsList.append(preds)
    
    print('----------------------predic test----------------------------')
    prediction_plot(testY,preds)

In [None]:
idsx=datax.index.tolist()
# Crear un DataFrame
ids=idsx

df = pd.DataFrame(predsList)
df['id'] = ids
df.set_index('id', inplace=True)
# Guardar el DataFrame en un archivo CSV
df.to_csv('preds_lgbm_300sOrginal.csv', index=True)

In [None]:
p= sum(smapeList)/len(smapeList)
sp=round(p,2)
print('sMape promedio: ',sp)

In [None]:
dfr= pd.DataFrame(smapeList)
dfr.to_csv("Results_lgbm_300s.csv")