In [38]:

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")


In [39]:
import statsmodels.api as sm
import pandas as pd

from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf
from pmdarima.utils import decomposed_plot
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima.arima import auto_arima
from statsmodels.tsa.stattools import acf
from sklearn.preprocessing import MinMaxScaler


def convertir_rango_edad_a_numero(rango_edad):
    # Dividir el rango de edad por 3 y tomar la parte entera del resultado
    return int(rango_edad // 3)


# Aplicar la función a la serie
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100



def calculo_metricas(results):
    residuals = results.resid

    mae = np.mean(np.abs(residuals))
    result = acorr_ljungbox(residuals,period=52)
    mse = np.mean(residuals**2)
    rmse = np.sqrt(mse)
    p_value=round(result.lb_pvalue.mean(),3)

    return(mae,mse,rmse,results.aic,results.bic,p_value)

def summary_table(results):
    summary_table = results.summary().tables[1]

    # Convert the summary table to a DataFrame
    summary_df = pd.DataFrame(summary_table.data[1:], columns=summary_table.data[0])

    # Clean up the DataFrame
    summary_df = summary_df.apply(pd.to_numeric, errors='ignore')

    # Rename columns for better access
    summary_df.columns = ['variable', 'coef', 'std_err', 'z', 'P>|z|', '[0.025', '0.975]']

    return(summary_table )

In [41]:
df_f=pd.read_csv("Bases//Base_Limpia_Added.csv")

In [43]:
df_f["provincia"].unique()

array(['Buenos Aires', 'CABA', 'Chaco', 'Cordoba', 'Formosa', 'Jujuy',
       'La Pampa', 'Misiones', 'Salta', 'Santa Fe', 'Tucuman', 'San Luis',
       'Catamarca', 'Entre Rios', 'San Juan', 'Tierra del Fuego',
       'Corrientes', 'Neuquen', 'Chubut', 'Santiago del Estero',
       'Mendoza', 'Santa Cruz', 'Rio Negro', 'La Rioja'], dtype=object)

In [44]:
df_c0=pd.read_csv("Bases//Base_Casos_Act.csv")


cols_clima=['salud',"densidad_estimada","index obesity","TEMP","HUM","PNM","DD","FF"]

cols_pobreza=['% de hogares sin acceso a red cloacal','% de población en situación de pobreza crónica','% de población sin obra social ni prepaga'] # Mejor
cols_pobreza=['% de hogares sin acceso a red cloacal','% de población en situación de pobreza crónica']
cols_pobreza=[ 'Nivel de incidencia de pobreza crónica','% de hogares con hacinamiento crítico','% de hogares en vivienda deficitaria','% de hogares sin acceso a red cloacal','% de población en situación de pobreza crónica','% de población sin obra social ni prepaga']

cols_pobreza=['% de hogares sin acceso a red cloacal','% de población en situación de pobreza crónica','% de población sin obra social ni prepaga'] # Mejor
cols_pobreza=[ 'Nivel de incidencia de pobreza crónica','% de hogares con hacinamiento crítico','% de hogares en vivienda deficitaria','% de hogares sin acceso a red cloacal','% de población en situación de pobreza crónica','% de población sin obra social ni prepaga']

cols_pobreza=[]
cols_pred=cols_clima+cols_pobreza


l_provincias=[]

l_provincias=["Santa Fe"]

l_provincias=["CABA"]

l_provincias=["Salta","Jujuy"]

l_provincias=["Buenos Aires","CABA"]
def pre_process(l_p,df_c0,df_f,cols_pred):
    if len(l_provincias)==0:
        df_cc=df_c0.copy()
        df_f2=df_f.copy()[cols_pred+["fecha"]]
    else:
        df_cc=df_c0.loc[df_c0["provincia"].isin(l_p)]
        df_f2=df_f.loc[df_f["provincia"].isin(l_p)][cols_pred+["fecha"]]

    df_cc.drop(["provincia"],axis=1,inplace=True)
    

    columns_to_normalize = df_f2.columns.difference(['fecha'])

    scaler = MinMaxScaler()
    df_f2 = df_f2.copy()  # Crear una copia del DataFrame original
    df_f2[columns_to_normalize] = scaler.fit_transform(df_f2[columns_to_normalize])
    df_f2['fecha'] = pd.to_datetime(df_f2['fecha'])


    promedio_por_dia=df_f2.groupby(df_f2['fecha'].dt.to_period('D'))[cols_pred].mean()
    promedio_por_semana = df_f2.groupby(df_f2['fecha'].dt.to_period('W-Mon'))[cols_pred].mean()


    df_cc['fecha'] = pd.to_datetime(df_cc['fecha'])
    incidencia_por_semana2 = df_cc.groupby(df_cc['fecha'].dt.to_period('W-Mon'))['casos_corr_2'].sum()
    incidencia_por_semana2 = incidencia_por_semana2.reset_index()

    df_f2 = pd.merge(pd.DataFrame(incidencia_por_semana2, columns=["fecha", "casos_corr_2"]), 
                    pd.DataFrame(promedio_por_semana), on="fecha", how="left")
    df_f2=df_f2.rename(columns={"casos_corr_2":"Casos"})

    df_f2['fecha'] = pd.to_datetime(df_f2['fecha'].astype(str).str.split('/').str[0])


    df_f2 = df_f2.interpolate()
    return(df_f2,promedio_por_dia,promedio_por_semana )



In [45]:

def easy_models(df_f2,cols_pred):
    model = sm.tsa.ARIMA(df_f2['Casos'], order=(1, 1, 1))
    results = model.fit()
    print("ARIMA results")
    arima_results=summary_table(results)
    # print(results.summary())

    results.fittedvalues[0]=results.fittedvalues.mean() 

    arima_metrics=calculo_metricas(results)
    print(arima_metrics)
    model = sm.tsa.ARIMA(df_f2['Casos'], exog=df_f2[cols_pred], order=(1, 1, 1))
    results = model.fit()
    
    print("ARIMAX results")
    arimax_results=summary_table(results)
    print(results.summary())
    results.fittedvalues[0]=results.fittedvalues.mean()    

    arimax_metrics=calculo_metricas(results)
    print(arimax_metrics)
    print("Auto model results")
    auto_model = auto_arima(df_f2['Casos'],max_p=8, max_d=8  , max_q=8) 

    print(auto_model.summary())
    p_best = auto_model.order[0]
    d_best  = auto_model.order[1]
    q_best  = auto_model.order[2]

    order = (p_best, d_best, q_best)  # Parámetros ARIMA (p, d, q)

    model = sm.tsa.SARIMAX(df_f2['Casos'], order=order)
    results = model.fit()
    sarima_results=summary_table(results)
    print("SARIMA results")

    results.fittedvalues[0]=results.fittedvalues.mean()    

    sarima_metrics=calculo_metricas(results)
    print(sarima_metrics)
    order = (p_best, d_best, q_best) 
    pred1=df_f2[cols_pred]

    model = sm.tsa.SARIMAX(df_f2['Casos'], order=order,exog=pred1)
    results = model.fit()

    sarimax_results=summary_table(results)
    print("SARIMAX results")
    print(results.summary())

    results.fittedvalues[0]=results.fittedvalues.mean()    

    sarimax_metrics=calculo_metricas(results)
    print(sarimax_metrics)
    l_metrics=[]
    for m in [arima_metrics,
    arimax_metrics,
    sarima_metrics,
    sarimax_metrics]:
        l_metrics.append(m)


    SARIMA_name="SARIMAX ("+str(p_best)+", "+str(d_best)+", "+str(q_best)+")"
    SARIMAX_name="SARIMAX ("+str(p_best)+", "+str(d_best)+", "+str(q_best)+")"
    names=["ARIMA (1,1,1)","ARIMA(1,1,1)",SARIMA_name,SARIMAX_name]
    df_metrics=pd.DataFrame(l_metrics,columns=["mae","mse","rmse","AIC","BIC","p-value"])
    df_metrics["Model Name"]=names

    return(arima_results,arimax_results,sarima_results,sarimax_results,df_metrics)


def grif_search(df_f2,best_rmse,model_="SARIMAX"):

    l_metrics=[]
    p,d,q=4,2,4
    print("max",p,d,q)
    for p_ in range(p):
        for d_ in range(d):
            for q_ in range(q):
                order = (p_, d_, q_)  # Parámetros ARIMA (p, d, q)

                try:
                    if model_=="SARIMAX":
                        model = sm.tsa.SARIMAX(df_f2['Casos'], exog=df_f2[cols_pred], order=order)
                    elif model_=="ARIMA":
                        model = sm.tsa.ARIMA(df_f2['Casos'], exog=df_f2[cols_pred], order=order)                       
                    results = model.fit()
                    # 
                    results.fittedvalues[0]=results.fittedvalues.mean()  
            
                    mae,mse,rmse,aic,bic,p_value=calculo_metricas(results)  
                    if rmse<=best_rmse and p_value<0.1:
                        # best_rmse=rmse

                        best_results=summary_table(results)
                        print(p_,d_,q_)                      
                        if p_value<0.06:
                            print(results.summary())
                        print(mae,mse,rmse,aic,bic,p_value)
                        values=(p_,d_,q_,mae,mse,rmse,aic,bic,p_value,best_results)
                        l_metrics.append(values)
                    
                except:
                    print("error")
    
    df_best=pd.DataFrame(l_metrics)
    df_best["NAME"]="SARIMAX ("+df_best[0].astype(str)+", "+df_best[1].astype(str)+", "+df_best[2].astype(str)+")"
    df_best.drop([0,1,2],axis=1,inplace=True)
    df_best.columns=["mae","mse","rmse","AIC","BIC","p-value","results","Model Name"]
    return(df_best)
    

def best_sarimax(df_metrics,model_):
    display(df_metrics[df_metrics["p-value"]<0.05])
    best_rmse=df_metrics.loc[df_metrics["p-value"]<0.05]["rmse"].min()

    display(best_rmse)
    df_best=grif_search(df_f2,best_rmse,model_)    
    
    best_sarimax=df_best.loc[df_best["p-value"]<0.06].sort_values(by="rmse").iloc[0]

    df_metrics.loc[-1]=best_sarimax
    df_metrics=df_metrics.reset_index(drop=True).round(2)
    return(df_metrics,df_best)

for l_p in [l_provincias]:
    print(l_p)
    df_f2,promedio_dia,promedio_por_semana =pre_process(l_provincias,df_c0,df_f,cols_pred)

    arima_results,arimax_results,sarima_results,sarimax_results,df_metrics=easy_models(df_f2,cols_pred)
df_metrics

['Buenos Aires', 'CABA']
ARIMA results
(3.2811015473091207, 17.86447668909888, 4.226638935265098, 4173.663039661218, 4187.442173263646, 0.003)
ARIMAX results
                               SARIMAX Results                                
Dep. Variable:                  Casos   No. Observations:                  731
Model:                 ARIMA(1, 1, 1)   Log Likelihood               -2028.248
Date:                Wed, 26 Jun 2024   AIC                           4078.497
Time:                        19:11:40   BIC                           4129.020
Sample:                             0   HQIC                          4097.989
                                - 731                                         
Covariance Type:                  opg                                         
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
salud                 5.8034      6.85

Unnamed: 0,mae,mse,rmse,AIC,BIC,p-value,Model Name
0,3.281102,17.864477,4.226639,4173.66304,4187.442173,0.003,"ARIMA (1,1,1)"
1,2.99613,15.265166,3.907066,4078.496877,4129.020367,0.001,"ARIMA(1,1,1)"
2,3.250109,17.510684,4.184577,4161.006245,4179.378423,0.03,"SARIMAX (2, 1, 1)"
3,2.942861,14.686746,3.832329,4053.005875,4108.122409,0.032,"SARIMAX (2, 1, 1)"


In [46]:
best_rmse=df_metrics.loc[df_metrics["p-value"]<0.05]["rmse"].min()
best_rmse

3.832329072205976

In [47]:
def simple_table_to_df(simple_table):
    data = simple_table.data[1:]  # Skip the header row
    headers = simple_table.data[0]  # Use the first row as headers
    df = pd.DataFrame(data, columns=headers)
    df = df.apply(pd.to_numeric, errors='ignore')
    return df

def best_variables(arimax_results,sarimax_results,best_sarimax):
    df1 = simple_table_to_df(arimax_results)[["","coef","P>|z|"]].add_suffix('_ARIMAX')
    df2 = simple_table_to_df(sarimax_results)[["","coef","P>|z|"]].add_suffix('_SARIMAX')
    display(best_sarimax)
    df3 = simple_table_to_df(best_sarimax["results"])[["","coef","P>|z|"]].add_suffix('_SARIMAX_BEST')

    df1 = df1.rename(columns={'_ARIMAX': 'variable'})
    df2 = df2.rename(columns={'_SARIMAX': 'variable'})
    df3 = df3.rename(columns={'_SARIMAX_BEST': 'variable'})
    df_final=pd.merge(df1,df2,on="variable",how="left")
    df_final=pd.merge(df_final,df3,on="variable",how="left")[:-3]
    df_final=df_final.loc[(df_final["P>|z|_ARIMAX"]<0.15)|(df_final["P>|z|_SARIMAX"]<0.05)|(df_final["P>|z|_SARIMAX_BEST"]<0.15)]
    return(df_final)



In [48]:
sarimax_true=df_metrics.loc[(df_metrics["Model Name"].str[:7]=="SARIMAX")&(df_metrics["p-value"]<=0.1)]

if len(sarimax_true)>0:
    model_best="SARIMAX"
else:
    model_best="ARIMA"

In [49]:
df_metrics,df_best=best_sarimax(df_metrics,model_best)

Unnamed: 0,mae,mse,rmse,AIC,BIC,p-value,Model Name
0,3.281102,17.864477,4.226639,4173.66304,4187.442173,0.003,"ARIMA (1,1,1)"
1,2.99613,15.265166,3.907066,4078.496877,4129.020367,0.001,"ARIMA(1,1,1)"
2,3.250109,17.510684,4.184577,4161.006245,4179.378423,0.03,"SARIMAX (2, 1, 1)"
3,2.942861,14.686746,3.832329,4053.005875,4108.122409,0.032,"SARIMAX (2, 1, 1)"


3.832329072205976

max 4 2 4
1 1 3
                               SARIMAX Results                                
Dep. Variable:                  Casos   No. Observations:                  731
Model:               SARIMAX(1, 1, 3)   Log Likelihood               -2012.302
Date:                Wed, 26 Jun 2024   AIC                           4050.604
Time:                        19:12:10   BIC                           4110.314
Sample:                             0   HQIC                          4073.641
                                - 731                                         
Covariance Type:                  opg                                         
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
salud                 7.1993      6.505      1.107      0.268      -5.550      19.949
densidad_estimada    21.9009      4.119      5.317      0.000      13.828      29.973
index ob

In [51]:
df_metrics.to_csv("Resultados\\Tseries_Metricas"+str(l_provincias)+".csv",index=False)

df_variables=pd.DataFrame(df_best.iloc[-1]["results"])
df_variables[0].iloc[0]="variable"
df_variables.columns=df_variables.iloc[0]

df_variables=df_variables[1:]

df_variables.to_csv("Resultados\\Tseries_Variables"+str(l_provincias)+".csv",index=False)

In [52]:
df_variables=pd.read_csv("Resultados\\Tseries_Variables"+str(l_provincias)+".csv").round(2)

if "Unnamed: 0" in df_variables.columns:
    df_variables.drop("Unnamed: 0",axis=1,inplace=True)

df_metrics=pd.read_csv("Resultados\\Tseries_Metricas"+str(l_provincias)+".csv").round(2)

if "Unnamed: 0" in df_metrics.columns:
    df_metrics.drop("Unnamed: 0",axis=1,inplace=True)


for s in ["AIC","BIC"]:
    df_metrics[s]=df_metrics[s].astype(int)

df_aux=df_metrics.copy()
df_metrics.drop("Model Name",axis=1,inplace=True)

df_metrics=pd.concat([df_aux[["Model Name"]],df_metrics],axis=1)
print(l_provincias)
df_metrics

['Buenos Aires', 'CABA']


Unnamed: 0,Model Name,mae,mse,rmse,AIC,BIC,p-value
0,"ARIMA (1,1,1)",3.28,17.86,4.23,4173,4187,0.0
1,"ARIMA(1,1,1)",3.0,15.27,3.91,4078,4129,0.0
2,"SARIMAX (2, 1, 1)",3.25,17.51,4.18,4161,4179,0.03
3,"SARIMAX (2, 1, 1)",2.94,14.69,3.83,4053,4108,0.03
4,"SARIMAX (3, 1, 2)",2.93,14.6,3.82,4053,4117,0.03


In [55]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler


df_cc=df_c0.loc[df_c0["provincia"].isin(l_p)]
df_f2['fecha']=pd.to_datetime(df_f2['fecha'])
# df_daily = df_f2.groupby('fecha').size().reset_index(name='cases')
df_daily = df_f2.groupby(df_f2['fecha'].dt.to_period('W-Mon'))['Casos'].sum().reset_index(name='cases')





scaler = MinMaxScaler(feature_range=(0, 1))
df_daily['cases_scaled'] = scaler.fit_transform(df_daily['cases'].values.reshape(-1, 1))

scaled_data = scaler.fit_transform(df_daily['cases'].values.reshape(-1, 1))
prom_dia_variables=pd.DataFrame(promedio_dia.to_records())
prom_dia_variables=pd.DataFrame(promedio_por_semana.to_records())




prom_dia_variables["fecha"]=prom_dia_variables["fecha"].astype(str)
df_ccc=df_cc[["fecha","casos_corr_2"]]
df_ccc["fecha"]=df_ccc["fecha"].astype(str)
df_data=pd.merge(prom_dia_variables,df_ccc,on=["fecha"],how="left")

df_data.index=df_data["fecha"]
df_data=pd.DataFrame(df_data.drop("fecha",axis=1))


def create_sequences(df, seq_length):
    xs = []
    ys = []
    for i in range(len(df) - seq_length):
        x = df.iloc[i:(i + seq_length), 1:].values  # Los predictores (x1, x2, ..., xn)
        y = df.iloc[i + seq_length, 0]              # La serie temporal (y)
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Seleccionar tus predictores (todas las columnas excepto "casos_corr_2")

seq_length=10
# Crear las secuencias
imputer_X = SimpleImputer(strategy='mean')
X_imputed = imputer_X.fit_transform(df_data.drop(columns=["casos_corr_2"]).values)

# Imputar NaN en y
imputer_y = SimpleImputer(strategy='mean')
y_imputed = imputer_y.fit_transform(df_data["casos_corr_2"].values.reshape(-1, 1))

# Crear las secuencias
X_seq, y_seq = create_sequences(pd.DataFrame(X_imputed), seq_length)

# División en entrenamiento y prueba
split_ratio = 0.8
split = int(len(X_seq) * split_ratio)
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

# Ajustar las dimensiones de entrada para la LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

# Función para definir el modelo RNN
def rnn_model(X_train):
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

    return model,X_train,X_test,y_train,y_test

load=0

model,X_train,X_test,y_train,y_test = rnn_model(X_train)



model.summary()

# Evaluación del modelo con los datos de prueba
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 52ms/step - loss: 0.0182 - val_loss: 0.0026
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0023 - val_loss: 8.2783e-04
Epoch 3/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0017 - val_loss: 7.4448e-04
Epoch 4/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0015 - val_loss: 7.0001e-04
Epoch 5/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0015 - val_loss: 7.1834e-04
Epoch 6/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0017 - val_loss: 0.0012
Epoch 7/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0015 - val_loss: 7.4272e-04
Epoch 8/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0016 - val_loss: 9.6298e-04
Epoch 9/30
[1m18/18[0m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 133ms/step
Mean Squared Error: 0.0007758029994051187


In [56]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow.keras.backend as K

def calculate_log_likelihood(mse, n):
    return -n/2 * np.log(2 * np.pi * mse) - n/2


y_pred_2 = scaler.inverse_transform(y_pred)
y_test_2 = scaler.inverse_transform([y_test])

def count_parameters(model):
    return sum([np.prod(K.get_value(w).shape) for w in model.trainable_weights])

# Number of parameters in the model
num_params = count_parameters(model)

# Number of observations
n = len(y_test_2[0])

# MSE
mse = mean_squared_error(y_test_2[0], y_pred_2[:,0])

# Log likelihood
log_likelihood = calculate_log_likelihood(mse, n)

# AIC
aic = 2 * num_params - 2 * log_likelihood

# BIC
bic = np.log(n) * num_params - 2 * log_likelihood

#RMSE
rmse = np.sqrt(mse)
print(f'Loglikelihood: {round(log_likelihood,2)}')
print(f"AIC: {round(aic,2)}")
print(f"BIC: {round(bic,2)}")
print(f"MSE: {round(mse,2)}")
print(f'RMSE: {round(rmse,3)}')

# # MAE (Error Absoluto Medio)
mae = np.mean(np.abs(y_test_2[0] - y_pred_2[:, 0]))
print(f'Mean Absolute Error (MAE): {round(mae, 2)}')


Loglikelihood: -189.06
AIC: 569380.12
BIC: 1416263.85
MSE: 0.79
RMSE: 0.891
Mean Absolute Error (MAE): 0.72


In [57]:
df_final=df_metrics[-1:]

df_final.loc[-1]=['LSTM Model',round(mae, 2),round(mse,2),round(rmse,3),round(aic,2),round(bic,2),'']

df_final.drop(["p-value"],axis=1,inplace=True)
df_final=df_final.reset_index(drop=True)
df_final["AIC"]=df_final["AIC"].astype(int)
df_final["rmse"]=df_final["rmse"].round(2)
df_final["BIC"]=df_final["BIC"].astype(int)
df_final

Unnamed: 0,Model Name,mae,mse,rmse,AIC,BIC
0,"SARIMAX (3, 1, 2)",2.93,14.6,3.82,4053,4117
1,LSTM Model,0.72,0.79,0.89,569380,1416263
