<a href="https://colab.research.google.com/github/LucasMirandaVS/estudos_python/blob/main/V1_Forecast_Rest_(Beta).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta

# Carregar os dados
path = '/content/Result_1.csv'
df = pd.read_csv(path)

# Converter a coluna 'DAY' para o tipo datetime
df['DAY'] = pd.to_datetime(df['DAY'])

# Criar características a partir da coluna de data
df['year'] = df['DAY'].dt.year
df['month'] = df['DAY'].dt.month
df['day'] = df['DAY'].dt.day
df['dayofweek'] = df['DAY'].dt.dayofweek
df['weekofyear'] = df['DAY'].dt.isocalendar().week
df['is_weekend'] = df['DAY'].dt.dayofweek >= 5

# Adicionar variável de feriado (exemplo)
feriados = pd.to_datetime(['2024-01-01', '2024-04-21', '2024-05-01', '2024-06-15', '2024-08-11','2024-09-07', '2024-10-12', '2024-11-02', '2024-11-15', '2024-12-25'])
df['is_holiday'] = df['DAY'].isin(feriados)

# Seleção de características (X) e variável alvo (y)
X = df[['year', 'month', 'day', 'dayofweek', 'weekofyear', 'is_weekend', 'is_holiday', 'TIER']]
y = df['ORDERS']

# Função para treinar modelo por tier
def train_model(tier, X, y):
    X_tier = X[X['TIER'] == tier]
    y_tier = y[X['TIER'] == tier]

    if X_tier.shape[0] == 0:
        print(f"Sem dados suficientes para treinar para o tier: {tier}")
        return None

    X_train, X_test, y_train, y_test = train_test_split(X_tier, y_tier, test_size=0.2, random_state=42)

    numeric_features = ['year', 'month', 'day', 'dayofweek', 'weekofyear']
    categorical_features = ['is_weekend', 'is_holiday']

    numeric_transformer = StandardScaler()
    categorical_transformer = OneHotEncoder(drop='first')

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', GradientBoostingRegressor(random_state=42))
    ])

    pipeline.fit(X_train, y_train)

    return pipeline

# Treinar modelos para cada tier
tiers = X['TIER'].unique()
models = {}

for tier in tiers:
    model = train_model(tier, X, y)
    if model is not None:
        models[tier] = model

# Função para fazer previsões para os próximos 365 dias
def predict_next_365_days(models, tiers, start_date):
    predictions = []

    for tier in tiers:
        if tier not in models:
            continue

        model = models[tier]
        next_day = start_date.date()  # Converter para objeto date

        for i in range(365):
            next_day_data = {
                'year': next_day.year,
                'month': next_day.month,
                'day': next_day.day,
                'dayofweek': next_day.weekday(),  # Usar weekday() para obter o dia da semana
                'weekofyear': next_day.isocalendar()[1],
                'is_weekend': next_day.weekday() >= 5,
                'is_holiday': next_day in feriados,
                'TIER': tier
            }
            X_pred = pd.DataFrame(next_day_data, index=[0])
            predicted_orders = model.predict(X_pred)[0]
            predictions.append({
                'DAY': next_day.strftime('%Y-%m-%d'),
                'TIER': tier,
                'ORDERS': predicted_orders
            })
            next_day += timedelta(days=1)

    return pd.DataFrame(predictions)

# Definir a data de início para as previsões (1 de julho de 2024)
start_date = datetime(2024, 7, 1)

# Prever os próximos 365 dias para cada vertical a partir de start_date
predictions_next_365_days = predict_next_365_days(models, tiers, start_date)

# Definir o nome do arquivo de saída com a data atual
output_date = datetime.now().strftime('%d_%m_%Y')
output_path = fr'predictions_next_365_days({output_date}).csv'

# Salvar os resultados em um arquivo CSV no caminho especificado
predictions_next_365_days.to_csv(output_path, index=False)


Sem dados suficientes para treinar para o tier: nan
