In [1]:
# Paquetes Necesarios
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from typing import List, Tuple, Optional, Dict
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import argparse
import json
import time
import os
from datetime import datetime

In [2]:
class EVL(nn.Module):

    def __init__(self, gamma = 1):

        super(EVL, self).__init__()

        self.gamma = gamma

    def evl_perdida(self, u:torch.Tensor, v: torch.Tensor, p_evento : float, is_right : bool) -> torch.Tensor:

        beta_0 = 1 - p_evento
        beta_1 = p_evento

        u_c = torch.clamp(u, 0.001, 0.999)

        if is_right:
            pesos_extremos = beta_0 * torch.pow(1-u_c/self.gamma, self.gamma)
            pesos_normal = beta_1 * torch.pow(1-(1-u_c)/self.gamma, self.gamma)
        else:
            pesos_extremos = beta_0 * torch.pow(u_c/self.gamma, self.gamma)
            pesos_normal = beta_1 * torch.pow(1-u_c/self.gamma, self.gamma)

        termino_extremo = -pesos_extremos*v*torch.log(u_c)
        termino_normal = -pesos_normal*(1-v)*torch.log(1-u_c)

    def forward(self, ut: torch.Tensor, vt: torch.Tensor, p_der: float, p_izq: float) -> torch.Tensor:

        vt_derecha = (vt==1).float()
        vt_izq = (vt==-1).float()
        vt_dentro = (vt==0).float()

        perdida_derecha = self.evl_perdida(ut, vt_derecha, p_der, is_right = True)
        perdida_izq = self.evl_perdida(-ut, vt_izq, p_izq, is_right = False)

        return perdida_derecha + perdida_izq

In [3]:
# Clase para la inicialización del Modelo

class GRU_EVT(nn.Module):

    def __init__(self, size_input, size_oculto, size_memoria, size_ventana, size_salida=1, gamma = 1):

        super(GRU_EVT, self).__init__()
        
        self.size_oculto = size_oculto
        self.size_memoria = size_memoria
        self.size_ventana = size_ventana
        self.gamma = gamma

        #Modelo
        self.gru = nn.GRU(size_input, size_oculto, batch_first=True)
        #Salidas
        self.capa_salida = nn.Linear(size_oculto, size_salida)
        self.extreme_predictor = nn.Sequential(
            nn.Linear(size_oculto, size_oculto//2),
            nn.ReLU(),
            nn.Linear(size_oculto//2,1),
            nn.Tanh()
        )
        #b del paper
        self.b = nn.Parameter(torch.tensor(1.0, requires_grad=True))
        # Módulos de Memoria
        self.embedding_modulo = None #S \in R^{M X H}
        self.historia_modulo = None #Q \in {-1,0,1}^M
        #EVL
        self.evl_perdida = EVL(gamma = gamma)
        # Proporciones
        self.p_derecha = 0
        self.p_izq = 0

    def inicializar_memoria(self, device = torch.device):

        self.embedding_modulo = torch.zeros(self.size_memoria, self.size_oculto, device=device)
        self.historia_modulo = torch.zeros(self.size_memoria, dtype=torch.long, device=device)

    def actualizar_proporciones(self, vt_batch : torch.Tensor):

        vt_np = vt_batch.cpu().numpy()
        total = len(vt_np)
        self.p_si = np.sum(vt_np==1)/total
        self.p_no = np.sum(vt_np==0)/total

    def predecir_extremo_ventana(self, sj : torch.Tensor) -> torch.Tensor:

        return self.extreme_predictor(sj).squeeze()
    
    def computar_perdidas(self, prediccion: torch.Tensor, targets: torch.Tensor, ut_valores : torch.Tensor, vt_valores : torch.Tensor) -> Dict[str, torch.Tensor]:

        # Pérdida L1
        mse_loss = nn.MSELoss()(prediccion, targets)
        evl_loss_ut = self.evl_perdida(ut_valores, vt_valores, self.p_derecha, self.p_izq)
        L1 = mse_loss + evl_loss_ut

        # Pérdida L2
        if self.embedding_modulo is not None:
            pj_predicciones = self.predecir_extremo_ventana(self.embedding_modulo)
            qj_target = self.historia_modulo.float()

            L2 = self.evl_perdida(pj_predicciones, qj_target, self.p_derecha, self.p_izq)
        else:
            L2 = torch.tensor(0.0, device = prediccion.device)
        
        return {
            'perdida_total': L1+L2,
            'L1': L1,
            'L2': L2,
            'mse_perdida': mse_loss,
            'evl_perdida_ut': evl_loss_ut
        }
    
    def forward(self, x : torch.Tensor, tiempo_actual, return_componentes : bool) -> torch.Tensor:

        batch_size, sucesion_len, _ = x.shape

        #Procesamiento del GRU
        gru_out, oculto = self.gru(x)
        ultimo_oculto = oculto[-1]

        #Predicción Normal
        pred_normal = self.capa_salida(ultimo_oculto)

        # Mecanismo de Atención de Memoria
        ut = torch.zeros(batch_size, device=x.device)

        #Scores de atención
        if self.embedding_modulo is not None and self.historia_modulo is not None:

            score_atencion = torch.matmul(
                ultimo_oculto.unsqueeze(1),
                self.embedding_modulo.T.unsqueeze(0)
            ).squeeze(1)

            pesos_atencion = torch.softmax(score_atencion, dim =1)
            historia_one_hot = torch.zeros(self.size_memoria, 3,
                                           device = self.historia_modulo.device)
            historia_one_hot[torch.arrange(self.size_memoria), self.historia_modulo+1]=1

            contribucion_memoria = torch.matmul(
                pesos_atencion, historia_one_hot
            )

            ut = contribucion_memoria[:,2]-contribucion_memoria[:,0]

        pred_final = pred_normal + self.b + ut.unsqueeze(1)
            
        if return_componentes:
            return pred_final, pred_normal, ut

        return pred_final
    
    def get_label(self, sucesion, time_step):

        if time_step + 1 >= len(sucesion):
            return 0
        
        valor_actual = sucesion[time_step]
        valor_sig = sucesion[time_step+1]

        umbral_arriba = valor_actual + 2 * torch.std(sucesion[:time_step])
        umbral_abajo = valor_actual - 2 * torch.std(sucesion[:time_step])

        if valor_sig > umbral_arriba:
            return 1
        elif valor_sig < umbral_abajo:
            return -1
        else:
            return 0
    
    def sample_historia(self, tiempo_actual, sucesion) -> Tuple[List[torch.Tensor], List[int]]:

        ventanas = []
        labels = []

        if tiempo_actual <= self.size_ventanas + 1:
            return ventanas, labels
        
        indices_disponibles = list(range(self.size_ventana, tiempo_actual-1))
        if len(indices_disponibles) == 0 :
            return ventanas, labels
        
        sample_id = np.random.choice(
            indices_disponibles,
            size = min(self.size_memoria, len(indices_disponibles)),
            replace = False
        )

        for idx in sample_id:
            inicio = idx - self.size_ventana
            final = idx
            ventana = sucesion[inicio:final]

            label = self.get_label(sucesion, final)
            ventanas.append(ventana.unsqueeze(0))
            labels.append(label)

        return ventanas, labels
    
    def inicializar_memoria(self, device):

        self.embedding_modulo = torch.zeros(self.size_memoria, self.size_oculto, device=device)
        self.historia_modulo = torch.zeros(self.size_memoria, dtype=torch.long, device=device)

    def actualizar_memoria(self, ventanas, labels):

        if len(ventanas) == 0:
            return
        
        with torch.no_grad():
            embeddings = []
            for ventana in ventanas:
                _, oculto = self.gru(ventana)
                embeddings.append(oculto.squeeze(0))
            
            embeddings = torch.stack(embeddings)
        
        size_memoria_nueva = len(ventanas)

        if self.embedding_modulo is None:
            self.inicializar_memoria(embeddings.device)
        
        self.embedding_modulo = torch.cat([
            self.embedding_modulo[size_memoria_nueva:],
            embeddings
        ])

        labels_tensor = torch.tensor(labels, dtype=torch.long, 
                                     device=self.historia_modulo.device)
        self.historia_modulo = torch.cat([
            self.historia_modulo[size_memoria_nueva:],
            labels_tensor
        ])

In [4]:
class EntrenamientoEVT(nn.Module):
    # La idea es seguir el algoritmo del paper

    def __init__(self, size_input, size_oculto, size_memoria, size_ventana, tasa_aprendizaje = 0.001, gamma = 1, labda1 = 1):

        super(EntrenamientoEVT, self).__init__()

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = GRU_EVT(
            size_input=size_input,
            size_oculto=size_oculto,
            size_memoria=size_memoria,
            size_ventana=size_ventana,
            gamma = gamma
        ).to(self.device)

        self.optimizador = optim.Adam(self.model.parameters(), lr=tasa_aprendizaje)
        self.lambda1=labda1

    def entrenamiento(self, data:torch.Tensor, targets: torch.Tensor, 
                      vt_labels: torch.Tensor) -> Dict[str, float]:
        
        self.model.train()
        self.optimizador.zero_grad()
        self.model.actualizar_proporciones(vt_labels)
        batch_size, sec_len = targets.shape
        pred_todas = []
        ut_val_todos = []

        for t in range(sec_len):
            input_actual = data[:, :t+1, :]

            if t > self.model.size_ventana:
                ventanas, labels = self.model.sample_historia(t,data[0,:t])
                self.model.actualizar_memoria(ventanas, labels)

            pred, normal_pred, ut = self.model(input_actual, t, return_components = True)
            pred_todas.append(pred)
            ut_val_todos.append(ut)

        predicciones = torch.cat(pred_todas, dim=0)
        ut_valores = torch.stack(ut_val_todos)

        perdidas = self.model.computar_perdidas(predicciones, targets, ut_valores, vt_labels)

        perdidas['perdida_total'].backward()
        self.optimizador.step()

        return {k: v.item() for k, v in perdidas.items()}
    
    def predecir(self, data:torch.Tensor, step_pred: int) -> torch.Tensor:

        self.model.eval()
        with torch.no_grad():
            batch_size, suc_long, input_size = data.shape
            predicciones = []

            for t in range(suc_long):
                input_actual = data[:, :t+1, :]
                pred = self.model(input_actual, t)
                predicciones.append(pred)
            
            ultima_suc = data.clone()
            for paso in range(step_pred):
                pred = self.model(ultima_suc, suc_long+paso)
                predicciones.append(pred)

                input_sig = pred.unsqueeze(-1)
                ultima_suc = torch.cat([ultima_suc[:, 1:, :], input_sig], dim=1)

            return torch.cat(predicciones[suc_long:], dim=0)

In [5]:
class Data:

    def __init__(self, ruta, target, suc_long):

        self.ruta = ruta
        self.target = target
        self.suc_long = suc_long
        self.scaler_x = StandardScaler()
        self.scaler_y = StandardScaler()
        self.feature = None

    def cargar_datos(self):

        df = pd.read_csv(self.ruta)
        self.feature = [col for col in df.columns if col != self.target]
        X = df[self.feature].values.astype(np.float32)
        y = df[self.target].values.astype(np.float32).reshape(-1,1)

        X_scaled = self.scaler_x.fit_transform(X)
        y_scaled = self.scaler_y.fit_transform(y)

        return X_scaled, y_scaled, df
    
    def detectar_extremos(self, y_sucesion) -> np.ndarray:

        y_sucesion = y_sucesion.flatten()
        vt = np.zeros(len(y_sucesion))
        Q1 = np.percentile(y_sucesion, 25)
        Q3 = np.percentile(y_sucesion, 75)
        IQR = Q3 - Q1
        
        upper_threshold = Q3 + 1.5 * IQR
        lower_threshold = Q1 - 1.5 * IQR
        
        extreme_high = y_sucesion > upper_threshold
        extreme_low = y_sucesion < lower_threshold
        
        vt[extreme_high] = 1    
        vt[extreme_low] = -1    
        
        return vt

    
    def crear_sucesiones(self, X, y):

        n = len(X) - self.suc_long
        X_sucesion = []
        y_sucesion = []
        vt_sucesion = []

        for i in range(n):
            X_suc = X[i:i+self.suc_long]
            y_suc = y[i+1:i+self.suc_long+1]
            vt_suc = self.detectar_extremos(y_suc)
            X_sucesion.append(X_suc)
            y_sucesion.append(y_suc)
            vt_sucesion.append(vt_suc)
        
        return np.array(X_sucesion), np.array(y_sucesion), np.array(vt_sucesion)
    
    def procesar_datos(self, test_size = 0.2, val_size = 0.1):

        X, y, df = self.cargar_datos()
        X_suc, y_suc, vt_suc = self.crear_sucesiones(X,y)
        X_train, X_test, y_train, y_test, vt_train, vt_test = train_test_split(
            X_suc, y_suc, vt_suc, test_size=test_size, random_state=42, shuffle=False
        )
    
        X_train, X_val, y_train, y_val, vt_train, vt_val = train_test_split(
            X_train, y_train, vt_train, test_size=val_size, random_state=42, shuffle=False
        )

        return (X_train, y_train, vt_train), (X_val, y_val, vt_val), (X_test, y_test, vt_test)

In [6]:
class Evaluar:

    def __init__(self, configuracion):

        self.config = configuracion
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        print(f'Dispositivo:  {self.device}')

        self.cargar_datos = Data(
            ruta = configuracion['ruta'],
            target = configuracion['target_column'],
            suc_long=configuracion['seq_length']
        )

        self.model = EntrenamientoEVT(
            input_size = configuracion['input_size'],
            hidden_size = configuracion['hidden_size'],
            memory_size = configuracion['memory_size'],
            window_size = configuracion['window_size'],
            learning_rate = configuracion['learning_rate'],
            gamma = configuracion['gamma'],
            lambda1 = configuracion['lambda1']
        )

        self.resultados()

    def resultados(self):

        os.makedirs('models', exist_ok=True)
        os.makedirs('results', exist_ok=True)
        os.makedirs('plots', exist_ok=True)

    def cargar_data(self):

        (X_train, y_train, vt_train), (X_val, y_val, vt_val), (X_test, y_test, vt_test) = self.cargar_datos.procesar_datos()

        train_loader = DataLoader(
            TensorDataset(
                torch.FloatTensor(X_train),
                torch.FloatTensor(y_train),
                torch.FloatTensor(vt_train)
            ),
            batch_size = self.config['batch_size'],
            shuffle=True
        )

        test_loader = DataLoader(
            TensorDataset(
                torch.FloatTensor(X_test),
                torch.FloatTensor(y_test),
                torch.FloatTensor(vt_test)
            ),
            batch_size = self.config['batch_size'],
            shuffle=True
        )

        val_loader = DataLoader(
            TensorDataset(
                torch.FloatTensor(X_val),
                torch.FloatTensor(y_val),
                torch.FloatTensor(vt_val)
            ),
            batch_size = self.config['batch_size'],
            shuffle=True
        )

        return train_loader, val_loader, test_loader
    
    def evaluate_model(self):
   
        # Cargar datos
        train_loader, val_loader, test_loader = self.cargar_data()
        
        # Evaluar
        results = self._evaluate_on_loader(test_loader, "Test")
        
        # Guardar resultados
        self.save_results(results)
        
        # Visualizar
        self.plot_results(results)
        
        return results
    
    def _evaluate_on_loader(self, loader: DataLoader, dataset_name: str) -> dict:

        self.model.eval()
        all_predictions = []
        all_targets = []
        all_ut = []
        all_vt = []
        
        with torch.no_grad():
            for data, targets, vt_labels in loader:
                data, targets, vt_labels = data.to(self.device), targets.to(self.device), vt_labels.to(self.device)
                
                predictions = []
                ut_values = []
                
                # Procesar cada paso de tiempo
                for t in range(data.size(1)):
                    current_input = data[:, :t+1, :]
                    pred, _, ut = self.model(current_input, t, return_components=True)
                    predictions.append(pred)
                    ut_values.append(ut)
                
                predictions = torch.cat(predictions, dim=0)
                ut_values = torch.cat(ut_values, dim=0)
                
                all_predictions.append(predictions.cpu())
                all_targets.append(targets.cpu())
                all_ut.append(ut_values.cpu())
                all_vt.append(vt_labels.cpu())
        
        all_predictions = torch.cat(all_predictions).numpy()
        all_targets = torch.cat(all_targets).numpy()
        all_ut = torch.cat(all_ut).numpy()
        all_vt = torch.cat(all_vt).numpy()
        
        # Calcular métricas
        metrics = self._calculate_metrics(all_predictions, all_targets, all_ut, all_vt, dataset_name)
        
        return {
            'predictions': all_predictions,
            'targets': all_targets,
            'ut': all_ut,
            'vt': all_vt,
            'metrics': metrics
        }
    
    def _calculate_metrics(self, predictions: np.ndarray, targets: np.ndarray, 
                          ut: np.ndarray, vt: np.ndarray, dataset_name: str) -> dict:

        from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
        from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
        
        mse = mean_squared_error(targets, predictions)
        mae = mean_absolute_error(targets, predictions)
        rmse = np.sqrt(mse)
        r2 = r2_score(targets, predictions)
        
        extreme_mask = (vt != 0)
        normal_mask = (vt == 0)
        
        if extreme_mask.any():
            extreme_mse = mean_squared_error(targets[extreme_mask], predictions[extreme_mask])
            extreme_mae = mean_absolute_error(targets[extreme_mask], predictions[extreme_mask])
        else:
            extreme_mse = extreme_mae = 0.0
        
        predicted_extremes = (np.abs(ut) > 0.5).astype(int)  # Umbral
        true_extremes = (vt != 0).astype(int)
        
        if np.any(true_extremes):
            precision = precision_score(true_extremes, predicted_extremes, zero_division=0)
            recall = recall_score(true_extremes, predicted_extremes, zero_division=0)
            f1 = f1_score(true_extremes, predicted_extremes, zero_division=0)
            cm = confusion_matrix(true_extremes, predicted_extremes).tolist()
        else:
            precision = recall = f1 = 0.0
            cm = [[0, 0], [0, 0]]
        
        metrics = {
            f'{dataset_name}_mse': float(mse),
            f'{dataset_name}_mae': float(mae),
            f'{dataset_name}_rmse': float(rmse),
            f'{dataset_name}_r2': float(r2),
            f'{dataset_name}_extreme_mse': float(extreme_mse),
            f'{dataset_name}_extreme_mae': float(extreme_mae),
            f'{dataset_name}_extreme_ratio': float(np.mean(extreme_mask)),
            f'{dataset_name}_precision': float(precision),
            f'{dataset_name}_recall': float(recall),
            f'{dataset_name}_f1': float(f1),
            f'{dataset_name}_confusion_matrix': cm
        }
        

        for k, v in metrics.items():
            if 'confusion_matrix' not in k:
                print(f"{k}: {v:.6f}")
        
        return metrics
    
    def save_results(self, results: dict):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Guardar métricas
        with open(f'results/metrics_{timestamp}.json', 'w', encoding='utf-8') as f:
            json.dump(results['metrics'], f, indent=4, ensure_ascii=False)
        
        # Guardar predicciones (muestra)
        prediction_sample = {
            'targets': results['targets'][:100].tolist(),
            'predictions': results['predictions'][:100].tolist(),
            'ut': results['ut'][:100].tolist(),
            'vt': results['vt'][:100].tolist()
        }
        
        with open(f'results/predictions_sample_{timestamp}.json', 'w', encoding='utf-8') as f:
            json.dump(prediction_sample, f, indent=4, ensure_ascii=False)
        
    
    def plot_results(self, results: dict):
        predictions = results['predictions']
        targets = results['targets']
        ut = results['ut']
        vt = results['vt']
        
        # Gráfico de series temporales
        plt.figure(figsize=(15, 10))
        
        plt.subplot(2, 2, 1)
        plt.plot(targets, label='Real', alpha=0.7)
        plt.plot(predictions, label='Predicho', alpha=0.7)
        plt.title('Serie Temporal: Real vs Predicho')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.subplot(2, 2, 2)
        extreme_indices = np.where(vt != 0)[0]
        plt.scatter(extreme_indices, targets[extreme_indices], c='red', s=30, label='Eventos Extremos')
        plt.plot(targets, alpha=0.5, label='Real')
        plt.plot(predictions, alpha=0.5, label='Predicho')
        plt.title('Detección de Eventos Extremos')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.subplot(2, 2, 3)
        plt.scatter(targets, predictions, alpha=0.6)
        min_val = min(targets.min(), predictions.min())
        max_val = max(targets.max(), predictions.max())
        plt.plot([min_val, max_val], [min_val, max_val], 'r--', alpha=0.8)
        plt.xlabel('Valores Reales')
        plt.ylabel('Predicciones')
        plt.title('Predicciones vs Reales')
        plt.grid(True, alpha=0.3)
        
        plt.subplot(2, 2, 4)
        plt.plot(ut, label='UT (Predicción eventos)', alpha=0.7)
        plt.plot(vt, label='VT (Eventos reales)', alpha=0.7)
        plt.title('Indicadores de Eventos Extremos')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(f'plots/results_{datetime.now().strftime("%Y%m%d_%H%M%S")}.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        print("Gráficos guardados en la carpeta 'plots'")

def load_config():
    default_config = {
        "excel_path": r'..\..\data\Brujo\BaseCompletaFecha.csv',
        "target_column": "caudal_minimo",
        "input_size": 67,
        "hidden_size": 32,
        "memory_size": 20,
        "window_size": 10,
        "seq_length": 30,
        "batch_size": 8,
        "learning_rate": 0.005,
        "gamma": 0.8,
        "lambda1": 0.5
    }
    
    if os.path.exists('config.json'):
        with open('config.json', 'r') as f:
            loaded_config = json.load(f)
            default_config.update(loaded_config)
    
    return default_config


In [8]:
config = load_config()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cargar_datos = Data(
    ruta = config['excel_path'],
    target = config['target_column'],
    suc_long = config['seq_length']
)

model = EntrenamientoEVT(
    size_input=config['input_size'],
    size_oculto=config['hidden_size'],
    size_memoria=config['memory_size'],
    size_ventana=config['window_size'],
    tasa_aprendizaje=config['learning_rate'],
    gamma=config['gamma'],
    labda1=config['lambda1']).to(device)

(X_train, y_train, vt_train), (X_val, y_val, vt_val), (X_test, y_test, vt_test) = cargar_datos.procesar_datos()

train_loader = DataLoader(
    TensorDataset(
        torch.FloatTensor(X_train),
        torch.FloatTensor(y_train),
        torch.FloatTensor(vt_train)
    ),
    batch_size=config['batch_size'],
    shuffle=True
)

cuda


In [9]:
df = pd.read_csv('../../data/Brujo/BaseCompletaFecha.csv')

In [10]:
df

Unnamed: 0.1,Unnamed: 0,caudal_minimo,fecha,humedad_lag_1,temp_lag_1,temp_max_lag_1,temp_min_lag_1,prep_lag_1,soil_perfil_lag_1,soil_superf_lag_1,...,temp_max_lag_6,temp_min_lag_6,prep_lag_6,soil_perfil_lag_6,soil_superf_lag_6,dir_viento_lag_6,vel_viento_lag_6,nino_lag_6,caudal_minimo_lag_6,caudal_minimo_lag_12
0,4,43.600000,0,13.66,21.43,27.44,16.27,81.17,0.73,0.74,...,26.75,16.21,165.21,0.95,0.93,244.9,1.00,24.80,211.0,50.0
1,3,32.600000,26,14.69,21.57,27.08,16.34,88.92,0.81,0.83,...,26.22,16.61,289.76,0.96,0.94,247.5,0.73,24.70,153.0,25.9
2,7,29.600000,59,15.79,22.09,28.03,17.71,69.41,0.80,0.83,...,26.51,16.25,160.63,0.86,0.87,233.2,0.43,26.06,70.5,24.2
3,0,26.800000,63,15.37,21.04,27.33,17.20,279.39,0.95,0.94,...,27.48,17.74,262.33,0.87,0.89,236.9,0.98,26.19,224.0,21.5
4,8,30.100000,93,12.84,22.23,30.51,16.71,50.42,0.65,0.63,...,27.08,16.34,88.92,0.81,0.83,297.4,0.42,28.08,79.5,139.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,129,107.000000,3947,15.29,23.23,30.84,17.86,115.94,0.59,0.59,...,31.26,16.99,26.92,0.57,0.49,249.6,0.70,27.67,17.9,2.0
131,122,58.000000,3988,15.28,21.35,26.99,17.45,120.80,0.81,0.84,...,29.54,18.34,96.38,0.59,0.60,247.6,0.81,27.49,80.4,45.0
132,132,27.200000,4082,15.33,21.11,26.38,17.11,106.70,0.78,0.81,...,28.80,18.78,86.10,0.63,0.65,231.7,0.84,27.17,80.9,6.9
133,133,58.301462,4201,14.17,23.64,32.25,18.11,61.73,0.56,0.50,...,32.82,17.22,10.32,0.54,0.41,285.5,0.83,28.26,11.2,63.7


In [38]:
df['fecha'] = pd.to_datetime(df['fecha'])
df = df.sort_values('fecha')
df['fecha'] = (df['fecha']-df['fecha'].min()).dt.days

In [40]:
df.to_csv('..\..\data\Brujo\BaseCompletaFecha.csv')