# EDA: Análisis de Series Temporales - Detective de Datos

Este notebook documenta la generación, limpieza y validación de los datasets reales utilizados en el juego.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configuración visual
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Carga de Datos

In [None]:
retail = pd.read_csv('../data/retail_2022_cleaned.csv')
saas = pd.read_csv('../data/saas_2023_cleaned.csv')
ecom = pd.read_csv('../data/ecommerce_2023_cleaned.csv')

print("Retail shape:", retail.shape)
print("SaaS shape:", saas.shape)
print("E-commerce shape:", ecom.shape)

In [None]:
def calculate_metrics(df, col_name, name):
    start = df[col_name].iloc[0]
    end = df[col_name].iloc[-1]
    growth = (end - start) / start * 100
    
    mean = df[col_name].mean()
    std = df[col_name].std()
    volatility = (std / mean) * 100
    
    print(f"{name}: Growth={growth:.1f}%, Volatility={volatility:.1f}%")


## 2. Verificación de Métricas

In [None]:
calculate_metrics(retail, 'sales', 'Retail')
calculate_metrics(saas, 'users', 'SaaS')
calculate_metrics(ecom, 'traffic', 'E-commerce')

## 3. Visualización y Anomalías - Retail

In [None]:
plt.figure()
plt.plot(pd.to_datetime(retail['date']), retail['sales'], label='Ventas')
anomalies = retail[retail['is_anomaly'] == 1]
plt.scatter(pd.to_datetime(anomalies['date']), anomalies['sales'], color='red', label='Anomalías')
plt.title('Retail Sales 2022')
plt.legend()
plt.show()