In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
from datetime import timedelta

def prepare_time_series_data(combined_data):
    # Agrupamos por fecha para obtener el conteo diario de posts
    daily_posts = combined_data.groupby(combined_data['createdPost'].dt.date).size()
    daily_posts.index = pd.DatetimeIndex(daily_posts.index)
    return daily_posts

def find_best_parameters(data, seasonality):
    """
    Encuentra los mejores parámetros SARIMA probando diferentes combinaciones.
    """
    best_aic = np.inf
    best_params = None
    
    # Ampliamos el rango de parámetros a probar
    p_range = range(0, 4)
    d_range = range(0, 3)
    q_range = range(0, 4)
    P_range = range(0, 3)
    D_range = range(0, 2)
    Q_range = range(0, 3)
    
    for p in p_range:
        for d in d_range:
            for q in q_range:
                for P in P_range:
                    for D in D_range:
                        for Q in Q_range:
                            try:
                                model = SARIMAX(data,
                                                order=(p, d, q),
                                                seasonal_order=(P, D, Q, seasonality),
                                                enforce_stationarity=False,
                                                enforce_invertibility=False)
                                results = model.fit(disp=False)
                                
                                if results.aic < best_aic:
                                    best_aic = results.aic
                                    best_params = ((p, d, q), (P, D, Q, seasonality))
                            except:
                                continue
    
    return best_params

def train_sarima_model(data, order=(2,1,2), seasonal_order=(1,1,1,7)):
    # Entrenamos el modelo SARIMA con los parámetros especificados
    model = SARIMAX(data,
                    order=order,
                    seasonal_order=seasonal_order,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    
    results = model.fit(disp=False)
    return results

def make_predictions(model, steps=180):
    # Realizamos predicciones para los próximos 6 meses
    forecast = model.get_forecast(steps=steps)
    
    # Obtenemos predicciones e intervalos de confianza
    predictions = forecast.predicted_mean
    confidence_intervals = forecast.conf_int()
    
    return predictions, confidence_intervals

def plot_predictions(historical_data, predictions, confidence_intervals):
    fig, ax = plt.subplots(figsize=(15, 8))
    
    # Datos históricos
    ax.plot(historical_data.index, historical_data.values, 
            label='Datos históricos', color='black')
    
    # Predicciones
    ax.plot(predictions.index, predictions.values, 
            label='Predicciones', color='blue', linestyle='--')
    
    # Intervalos de confianza
    ax.fill_between(confidence_intervals.index,
                    confidence_intervals.iloc[:, 0],
                    confidence_intervals.iloc[:, 1],
                    color='blue', alpha=0.2,
                    label='Intervalo de confianza 95%')
    
    ax.set_title('Predicción de posts para los próximos 6 meses')
    ax.set_xlabel('Fecha')
    ax.set_ylabel('Número de posts')
    ax.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    plt.show()

def main(combined_data):
    # Preparamos los datos
    print("Preparando datos...")
    daily_posts = prepare_time_series_data(combined_data)
    
    # Realizamos una descomposición estacional para obtener la estacionalidad dominante
    decomposition = seasonal_decompose(daily_posts, model='additive', period=7)
    seasonality = 7  # Se asume una estacionalidad semanal, ajustable según la descomposición
    
    # Encontramos los mejores parámetros
    print("Buscando mejores parámetros...")
    best_params = find_best_parameters(daily_posts, seasonality)
    print(f"Mejores parámetros encontrados: {best_params}")
    
    # Entrenamos el modelo con los mejores parámetros
    print("Entrenando modelo final...")
    model = train_sarima_model(daily_posts, 
                               order=best_params[0],
                               seasonal_order=best_params[1])
    
    # Hacemos predicciones
    print("Generando predicciones...")
    predictions, confidence_intervals = make_predictions(model)
    
    # Visualizamos predicciones
    plot_predictions(daily_posts, predictions, confidence_intervals)
    
    # Agrupamos predicciones por mes
    monthly_pred = predictions.groupby(predictions.index.to_period("M")).mean()
    
    return model, predictions, confidence_intervals, monthly_pred, best_params

# Ejecutamos el análisis completo
combined_data = pd.read_csv('../data/processed_data.csv', parse_dates=['createdPost'])  # Asegúrate de cargar los datos correctamente
model, predictions, confidence_intervals, monthly_predictions, best_params = main(combined_data)

# Mostramos las predicciones mensuales
print("\nPredicciones mensuales para los próximos 6 meses:")
print(monthly_predictions)

# Mostramos los parámetros utilizados
print("\nParámetros del modelo:")
print(f"SARIMA{best_params[0]}{best_params[1]}")


Preparando datos...
Buscando mejores parámetros...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._