# Limpieza de datos

In [72]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

Carga de Datos

In [73]:
df = pd.read_csv('../data/training.csv')
print(df.shape)
df.head()

(11696, 15)


Unnamed: 0,Date,Store ID,Product ID,Category,Region,Inventory Level,Units Sold,Units Ordered,Demand Forecast,Price,Discount,Weather Condition,Holiday/Promotion,Competitor Pricing,Seasonality
0,2022-11-19,S005,P0020,Furniture,South,452,432,176,442.08,84.09,0,Snowy,1,85.46,Summer
1,2023-06-29,S005,P0006,Clothing,South,377,212,31,221.02,50.49,15,Sunny,1,46.74,Autumn
2,2022-03-10,S005,P0010,Furniture,South,420,38,185,46.02,27.34,5,Sunny,1,22.62,Spring
3,2022-02-07,S005,P0001,Electronics,South,92,9,32,14.67,88.84,10,Snowy,1,85.76,Summer
4,2022-06-23,S005,P0015,Furniture,West,272,250,178,268.0,98.73,20,Sunny,1,96.54,Summer


In [74]:
df.columns

Index(['Date', 'Store ID', 'Product ID', 'Category', 'Region',
       'Inventory Level', 'Units Sold', 'Units Ordered', 'Demand Forecast',
       'Price', 'Discount', 'Weather Condition', 'Holiday/Promotion',
       'Competitor Pricing', 'Seasonality'],
      dtype='object')

In [75]:
df.dtypes

Date                   object
Store ID               object
Product ID             object
Category               object
Region                 object
Inventory Level         int64
Units Sold              int64
Units Ordered           int64
Demand Forecast       float64
Price                 float64
Discount                int64
Weather Condition      object
Holiday/Promotion       int64
Competitor Pricing    float64
Seasonality            object
dtype: object

In [76]:
# Dumificación 

# 1. Verificar que la columna 'Date' exista
if 'Date' not in df.columns:
    raise ValueError("La columna 'Date' no existe en el DataFrame.")

# 2. Convertir 'Date' a datetime, y manejar errores
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# 3. Verificar cuántas fechas no se pudieron convertir
num_invalidas = df['Date'].isna().sum()
if num_invalidas > 0:
    print(f"⚠️ Se encontraron {num_invalidas} fechas no válidas que serán descartadas.")

# 4. Eliminar filas con fechas inválidas
df = df.dropna(subset=['Date'])

# 5. Extraer componentes útiles
df['Mes'] = df['Date'].dt.month
df['Dia'] = df['Date'].dt.day
df['Dia_Semana'] = df['Date'].dt.weekday

# 6. Eliminar columna original de fecha
df.drop(columns=['Date'], inplace=True)
# 2. Columnas categóricas a dummificar (incluye nuevas)
columnas_a_dummificar = ['Region', 'Seasonality', 'Category', 'Weather Condition', 'Store ID', 'Product ID']

# 3. Dummificar todas las columnas con nombres en forma de pregunta
df_dummies = pd.get_dummies(df[columnas_a_dummificar],
                             prefix=[f'¿{col} es' for col in columnas_a_dummificar],
                             prefix_sep=' ')

# 4. Reemplazar columnas originales con las nuevas dummies
df_limpio = pd.concat([df.drop(columns=columnas_a_dummificar), df_dummies], axis=1)

# 5. Convertir columnas booleanas (True/False) a enteros (1/0)
for col in df_limpio.columns:
    if df_limpio[col].dtype == 'bool':
        df_limpio[col] = df_limpio[col].astype(int)



In [77]:
df_limpio

Unnamed: 0,Inventory Level,Units Sold,Units Ordered,Demand Forecast,Price,Discount,Holiday/Promotion,Competitor Pricing,Mes,Dia,...,¿Product ID es P0011,¿Product ID es P0012,¿Product ID es P0013,¿Product ID es P0014,¿Product ID es P0015,¿Product ID es P0016,¿Product ID es P0017,¿Product ID es P0018,¿Product ID es P0019,¿Product ID es P0020
0,452,432,176,442.08,84.09,0,1,85.46,11,19,...,0,0,0,0,0,0,0,0,0,1
1,377,212,31,221.02,50.49,15,1,46.74,6,29,...,0,0,0,0,0,0,0,0,0,0
2,420,38,185,46.02,27.34,5,1,22.62,3,10,...,0,0,0,0,0,0,0,0,0,0
3,92,9,32,14.67,88.84,10,1,85.76,2,7,...,0,0,0,0,0,0,0,0,0,0
4,272,250,178,268.00,98.73,20,1,96.54,6,23,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11691,341,6,126,6.56,18.97,0,1,17.93,9,17,...,0,1,0,0,0,0,0,0,0,0
11692,228,217,188,219.03,81.45,0,1,82.25,11,2,...,0,0,0,0,0,0,0,0,1,0
11693,377,105,31,104.03,82.45,15,0,86.37,9,27,...,1,0,0,0,0,0,0,0,0,0
11694,99,5,26,24.36,10.42,10,1,9.36,2,13,...,0,0,0,0,0,0,0,0,0,0


In [78]:
df_limpio.columns

Index(['Inventory Level', 'Units Sold', 'Units Ordered', 'Demand Forecast',
       'Price', 'Discount', 'Holiday/Promotion', 'Competitor Pricing', 'Mes',
       'Dia', 'Dia_Semana', '¿Region es East', '¿Region es North',
       '¿Region es South', '¿Region es West', '¿Seasonality es Autumn',
       '¿Seasonality es Spring', '¿Seasonality es Summer',
       '¿Seasonality es Winter', '¿Category es Clothing',
       '¿Category es Electronics', '¿Category es Furniture',
       '¿Category es Groceries', '¿Category es Toys',
       '¿Weather Condition es Cloudy', '¿Weather Condition es Rainy',
       '¿Weather Condition es Snowy', '¿Weather Condition es Sunny',
       '¿Store ID es S005', '¿Product ID es P0001', '¿Product ID es P0002',
       '¿Product ID es P0003', '¿Product ID es P0004', '¿Product ID es P0005',
       '¿Product ID es P0006', '¿Product ID es P0007', '¿Product ID es P0008',
       '¿Product ID es P0009', '¿Product ID es P0010', '¿Product ID es P0011',
       '¿Product ID es 

In [79]:
# Normalizacion tras la dummificacion

from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np

# 5. Normalización tras la dummificación
df_normalizado = df_limpio.copy()

# Seleccionar columnas numéricas
columnas_numericas = df_normalizado.select_dtypes(include=['int64', 'float64']).columns

# Aplicar normalización Min-Max
scaler = MinMaxScaler()
df_normalizado[columnas_numericas] = scaler.fit_transform(df_normalizado[columnas_numericas])

# Verificar que todas las columnas sean numéricas (seguridad para LSTM)
assert all(df_normalizado.dtypes.apply(lambda dt: np.issubdtype(dt, np.number))), "Hay columnas no numéricas en df_normalizado"



In [80]:
df_normalizado

Unnamed: 0,Inventory Level,Units Sold,Units Ordered,Demand Forecast,Price,Discount,Holiday/Promotion,Competitor Pricing,Mes,Dia,...,¿Product ID es P0011,¿Product ID es P0012,¿Product ID es P0013,¿Product ID es P0014,¿Product ID es P0015,¿Product ID es P0016,¿Product ID es P0017,¿Product ID es P0018,¿Product ID es P0019,¿Product ID es P0020
0,0.893333,0.874494,0.866667,0.875417,0.823203,0.00,1.0,0.804221,11,19,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.726667,0.429150,0.061111,0.447039,0.449828,0.75,1.0,0.415075,6,29,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.822222,0.076923,0.916667,0.107918,0.192577,0.25,1.0,0.172663,3,10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.093333,0.018219,0.066667,0.047167,0.875986,0.50,1.0,0.807236,2,7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.493333,0.506073,0.877778,0.538078,0.985887,1.00,1.0,0.915578,6,23,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11691,0.646667,0.012146,0.588889,0.031451,0.099567,0.00,1.0,0.125528,9,17,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11692,0.395556,0.439271,0.933333,0.443183,0.793866,0.00,1.0,0.771960,11,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
11693,0.726667,0.212551,0.061111,0.220332,0.804978,0.75,0.0,0.813367,9,27,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11694,0.108889,0.010121,0.033333,0.065945,0.004556,0.50,1.0,0.039397,2,13,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

lookback = 10
target_col = 'Units Sold'

# 1. Filtrar solo columnas numéricas
df_numerico = df_normalizado.select_dtypes(include=[np.number])

# 2. Verificar que la columna objetivo esté incluida
if target_col not in df_numerico.columns:
    raise ValueError(f"La columna objetivo '{target_col}' no está en las columnas numéricas.")

# 3. Normalizar todas las columnas numéricas
scaler = MinMaxScaler()
df_normalizado = pd.DataFrame(scaler.fit_transform(df_numerico), columns=df_numerico.columns)

# 4. Convertir a array y obtener índice de la columna objetivo
data_values = df_normalizado.values
target_index = df_normalizado.columns.get_loc(target_col)

# 5. Crear ventanas (lookback) para X e y
X, y = [], []
for i in range(lookback, len(data_values)):
    X.append(data_values[i - lookback:i])
    y.append(data_values[i][target_index])

# 6. Transformar en arrays 2D para DataFrame final
X_array = np.array(X).reshape(len(X), -1)
y_array = np.array(y).reshape(-1, 1)

# 7. Combinar en un DataFrame
df_vector_transformado = pd.DataFrame(np.hstack((X_array, y_array)))
df_vector_transformado.columns = [f'feature_{i+1}' for i in range(df_vector_transformado.shape[1] - 1)] + ['Target_Units_Sold']



In [82]:
df_vector_transformado

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,...,feature_482,feature_483,feature_484,feature_485,feature_486,feature_487,feature_488,feature_489,feature_490,Target_Units_Sold
0,0.893333,0.874494,0.866667,0.875417,0.823203,0.00,1.0,0.804221,0.909091,0.600000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.208502
1,0.726667,0.429150,0.061111,0.447039,0.449828,0.75,1.0,0.415075,0.454545,0.933333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.429150
2,0.822222,0.076923,0.916667,0.107918,0.192577,0.25,1.0,0.172663,0.181818,0.300000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.556680
3,0.093333,0.018219,0.066667,0.047167,0.875986,0.50,1.0,0.807236,0.090909,0.200000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.135628
4,0.493333,0.506073,0.877778,0.538078,0.985887,1.00,1.0,0.915578,0.454545,0.733333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11681,0.568889,0.593117,0.711111,0.592105,0.899433,1.00,1.0,0.854271,0.636364,0.133333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012146
11682,0.473333,0.336032,0.694444,0.335788,0.910768,1.00,0.0,0.827638,1.000000,0.733333,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.439271
11683,0.157778,0.127530,0.133333,0.138361,0.156128,0.25,0.0,0.189347,0.818182,0.166667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.212551
11684,0.004444,0.012146,0.111111,0.068192,0.466719,0.25,0.0,0.500905,0.636364,0.300000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010121


In [83]:
# Comprension de datos

from sklearn.decomposition import PCA

# 1. Separar features numéricas
X_features = df_vector_transformado.drop(columns=['Target_Units_Sold'], errors='ignore')

# 2. Asegurar que hay columnas numéricas
if X_features.empty:
    raise ValueError("No hay columnas numéricas para aplicar PCA.")

# 3. Convertir a float por seguridad (aunque ya deberían serlo)
X_features = X_features.astype(float)

# 4. Aplicar PCA
pca = PCA()
X_pca = pca.fit_transform(X_features)

# 5. Filtrar componentes con varianza explicada sobre el promedio
varianza_explicada = pca.explained_variance_ratio_
promedio_varianza = varianza_explicada.mean()
componentes_utiles = np.where(varianza_explicada > promedio_varianza)[0]
X_comprimido = X_pca[:, componentes_utiles]

# 6. Crear nuevo DataFrame comprimido
df_comprimido = pd.DataFrame(X_comprimido, columns=[f'PC_{i+1}' for i in componentes_utiles])
df_comprimido['Target_Units_Sold'] = df_vector_transformado['Target_Units_Sold'].values

# Resultado final
df_normalizado_final = df_comprimido.copy()


In [84]:
df_normalizado_final 

Unnamed: 0,PC_1,PC_2,PC_3,PC_4,PC_5,PC_6,PC_7,PC_8,PC_9,PC_10,...,PC_172,PC_173,PC_174,PC_175,PC_176,PC_177,PC_178,PC_179,PC_180,Target_Units_Sold
0,-0.338737,0.789277,-0.399547,0.747823,1.228503,1.224751,-0.819283,-0.376441,-0.407008,-0.147177,...,0.549662,0.445546,0.876085,-0.179696,0.023838,0.230187,0.181317,0.047291,-0.115112,0.208502
1,0.124037,0.971506,0.463261,0.541462,1.326632,-0.369979,0.648714,-0.283615,-0.944197,0.562789,...,-0.620025,-0.337519,-0.178956,-0.476061,0.111954,0.404079,0.225848,-0.309231,0.097057,0.429150
2,0.079727,1.185126,0.772364,1.024679,-0.610177,-0.592555,0.051091,0.281027,-0.623529,-0.863110,...,0.374446,0.529557,-0.346324,-0.074254,0.089982,-0.372629,-0.279486,-0.380417,0.146309,0.556680
3,0.703410,0.866059,-0.252469,-0.170366,-1.302108,0.756610,-0.222591,-0.172495,-0.193320,-1.027007,...,0.015350,-0.435281,0.183465,-0.260859,-0.243496,-0.522843,0.354353,0.162700,-0.640947,0.135628
4,-0.028350,0.235337,-0.303839,-1.469398,-0.013075,-0.280576,0.530169,-0.554790,-0.562334,0.523788,...,-0.062628,0.547965,-0.244991,-0.015629,-0.308705,0.340084,0.620916,0.363282,-0.074157,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11681,0.247296,-1.161537,-0.157666,-0.264079,-0.169544,-0.205545,-0.777746,0.266359,-0.660672,0.250344,...,0.030856,-0.296662,-0.000192,0.403966,-0.009206,-0.112164,0.310297,-0.238879,0.567350,0.012146
11682,-1.043237,-0.939540,-0.603052,-0.390273,0.179279,0.199660,0.288503,1.285805,-0.188455,-0.997956,...,-0.081784,0.332441,0.367831,0.322862,-0.462111,-0.204681,0.044967,-0.181562,0.154944,0.439271
11683,0.105487,-0.570649,0.462980,-0.016698,0.741010,-0.161425,0.136766,1.312691,0.022370,-0.233338,...,-0.169668,-0.009593,0.276148,-0.270307,-0.371806,0.289318,0.075699,0.036308,0.042182,0.212551
11684,-0.766141,0.171736,0.156165,0.793627,-0.036591,-0.327143,0.627989,0.826874,0.670628,-0.526657,...,0.194360,-0.108812,-0.214438,-0.187703,-0.130256,0.338016,-0.281798,-0.313375,0.073054,0.010121


In [86]:
# Normalizacion 2

from sklearn.preprocessing import MinMaxScaler

# 1. Separar features comprimidas y target
X_comprimido = df_comprimido.drop(columns=['Target_Units_Sold'])
y_target = df_comprimido['Target_Units_Sold']

# 2. Normalizar solo las PCs
scaler = MinMaxScaler()
X_normalizado = scaler.fit_transform(X_comprimido)

# 3. Crear nuevo DataFrame con nombre final
df_f = pd.DataFrame(X_normalizado, columns=X_comprimido.columns)
df_f['Target_Units_Sold'] = y_target.values



In [87]:
df_f

Unnamed: 0,PC_1,PC_2,PC_3,PC_4,PC_5,PC_6,PC_7,PC_8,PC_9,PC_10,...,PC_172,PC_173,PC_174,PC_175,PC_176,PC_177,PC_178,PC_179,PC_180,Target_Units_Sold
0,0.421263,0.693199,0.389382,0.664745,0.784778,0.815346,0.269276,0.413092,0.391062,0.469253,...,0.764624,0.705513,0.862065,0.400447,0.545689,0.609051,0.552146,0.503317,0.461114,0.208502
1,0.524654,0.740068,0.601405,0.614975,0.809093,0.400575,0.598834,0.437524,0.257980,0.652899,...,0.230141,0.366050,0.428798,0.275228,0.584778,0.684709,0.571864,0.362778,0.555755,0.429150
2,0.514754,0.795011,0.677362,0.731517,0.329189,0.342685,0.464670,0.586140,0.337422,0.284063,...,0.684560,0.741932,0.360066,0.444999,0.575031,0.346774,0.348101,0.334717,0.577725,0.556680
3,0.654096,0.712947,0.425524,0.443298,0.157742,0.693588,0.403230,0.466772,0.444000,0.241668,...,0.520473,0.323670,0.577631,0.366155,0.427096,0.281418,0.628767,0.548811,0.226558,0.135628
4,0.490608,0.550727,0.412901,0.129999,0.477139,0.423827,0.572221,0.366150,0.352582,0.642811,...,0.484841,0.749912,0.401680,0.469769,0.398169,0.656866,0.746802,0.627879,0.479383,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11681,0.552192,0.191453,0.448821,0.420696,0.438369,0.443342,0.278600,0.582280,0.328220,0.572079,...,0.527558,0.383762,0.502210,0.647055,0.531030,0.460099,0.609258,0.390510,0.765536,0.012146
11682,0.263866,0.248550,0.339374,0.390261,0.524801,0.548731,0.517968,0.850602,0.445205,0.249182,...,0.476088,0.656482,0.653344,0.612787,0.330116,0.419846,0.491770,0.413104,0.581577,0.439271
11683,0.520510,0.343428,0.601335,0.480359,0.663987,0.454817,0.483904,0.857679,0.497434,0.446965,...,0.435930,0.508208,0.615693,0.362163,0.370177,0.634778,0.505378,0.498988,0.531278,0.212551
11684,0.325774,0.534369,0.525940,0.675792,0.471312,0.411716,0.594181,0.729810,0.658031,0.371093,...,0.602271,0.465196,0.414227,0.397064,0.477331,0.655966,0.347077,0.361144,0.545048,0.010121


In [88]:
# Limpieza de la data previa la memoria

def limpiar_df(df, target_col='Target_Units_Sold'):
    df = df.copy()

    # 1. Eliminar filas con valores nulos (si los hay)
    df.dropna(inplace=True)

    # 2. Detectar y eliminar outliers con IQR en todas las columnas numéricas excepto el target
    def remove_outliers_iqr(df, columnas):
        for col in columnas:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower = Q1 - 1.5 * IQR
            upper = Q3 + 1.5 * IQR
            df = df[(df[col] >= lower) & (df[col] <= upper)]
        return df

    columnas_numericas = df.select_dtypes(include=[np.number]).columns.tolist()
    if target_col in columnas_numericas:
        columnas_numericas.remove(target_col)

    df = remove_outliers_iqr(df, columnas_numericas)

    # 3. (Opcional) Eliminar duplicados
    df.drop_duplicates(inplace=True)

    return df


In [90]:
df_f_limpio = limpiar_df(df_f)


In [91]:
df_f_limpio

Unnamed: 0,PC_1,PC_2,PC_3,PC_4,PC_5,PC_6,PC_7,PC_8,PC_9,PC_10,...,PC_172,PC_173,PC_174,PC_175,PC_176,PC_177,PC_178,PC_179,PC_180,Target_Units_Sold
0,0.421263,0.693199,0.389382,0.664745,0.784778,0.815346,0.269276,0.413092,0.391062,0.469253,...,0.764624,0.705513,0.862065,0.400447,0.545689,0.609051,0.552146,0.503317,0.461114,0.208502
1,0.524654,0.740068,0.601405,0.614975,0.809093,0.400575,0.598834,0.437524,0.257980,0.652899,...,0.230141,0.366050,0.428798,0.275228,0.584778,0.684709,0.571864,0.362778,0.555755,0.429150
3,0.654096,0.712947,0.425524,0.443298,0.157742,0.693588,0.403230,0.466772,0.444000,0.241668,...,0.520473,0.323670,0.577631,0.366155,0.427096,0.281418,0.628767,0.548811,0.226558,0.135628
4,0.490608,0.550727,0.412901,0.129999,0.477139,0.423827,0.572221,0.366150,0.352582,0.642811,...,0.484841,0.749912,0.401680,0.469769,0.398169,0.656866,0.746802,0.627879,0.479383,0.052632
7,0.462741,0.402568,0.297169,0.452115,0.384961,0.688176,0.598756,0.504689,0.446956,0.457164,...,0.596988,0.376633,0.570901,0.544211,0.306857,0.446563,0.322292,0.484181,0.345432,0.475709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11679,0.797023,0.473588,0.483190,0.518081,0.402109,0.429650,0.346038,0.430308,0.399165,0.694719,...,0.381662,0.519587,0.370139,0.511573,0.739393,0.423537,0.581945,0.540310,0.217251,0.273279
11680,0.459287,0.348252,0.261294,0.465269,0.469853,0.350916,0.444293,0.139937,0.416796,0.307854,...,0.646194,0.489174,0.502638,0.544970,0.726637,0.450051,0.694946,0.537988,0.585853,0.054656
11681,0.552192,0.191453,0.448821,0.420696,0.438369,0.443342,0.278600,0.582280,0.328220,0.572079,...,0.527558,0.383762,0.502210,0.647055,0.531030,0.460099,0.609258,0.390510,0.765536,0.012146
11683,0.520510,0.343428,0.601335,0.480359,0.663987,0.454817,0.483904,0.857679,0.497434,0.446965,...,0.435930,0.508208,0.615693,0.362163,0.370177,0.634778,0.505378,0.498988,0.531278,0.212551


In [93]:
import os

# Ruta relativa desde src/doc hacia src/data
output_path = '../data/lstm_training.csv'

# Guardar el DataFrame limpio
df_f_limpio.to_csv(output_path, index=False)

