In [1]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import pickle
import requests
import gradio as gr
import yfinance as yf

# Preprocesamiento

In [2]:
data = pd.read_csv('/content/near-protocol_2020-05-11_2025-05-10.csv')
data['Date'] = pd.to_datetime(data['Start'])
data.insert(0, 'Date', data.pop('Date'))
data.drop(['Start', 'End'], axis=1, inplace=True)

ses_model = SimpleExpSmoothing(data['Market Cap']).fit(smoothing_level=0.2, optimized=False)
data['Market Cap Smooth'] = ses_model.fittedvalues

# plt.figure(figsize=(15, 6))
# plt.plot(data['Market Cap'], label='Original', alpha=0.5)
# plt.plot(data['Market Cap Smooth'], label='Exponential Smoothing')
# plt.legend()
# plt.title('Suavizado exponencial')
# plt.show()

for col in ['Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap', 'Market Cap Smooth']:
    data[f'Pct Diff {col}'] = data[col].pct_change()

# data.to_csv('data.csv', index=False)

data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

data.to_csv('data_preprocesed.csv', index=False)

# Entrenamiento

In [5]:
# Config
INPUT_WINDOW = 56
PREDICTION_HORIZON = 7
N_SPLITS = 5

# Cargar datos
data = pd.read_csv('/content/data_preprocesed.csv')
data.drop(['Market Cap', 'Pct Diff Market Cap'], axis=1, inplace=True)
data.sort_values("Date", inplace=True)
data.reset_index(drop=True, inplace=True)

# Features porcentuales
features_pct = [col for col in data.columns if col.startswith('Pct Diff ')]

# Crear ventanas de entrenamiento
def create_features_and_labels(df, feature_columns, input_window, prediction_horizon):
    X, y = [], []
    for i in range(len(df) - input_window - prediction_horizon):
        window = df[feature_columns].iloc[i:i + input_window].values.flatten()
        last_high = df['High'].iloc[i + input_window - 1]
        future_high = df['High'].iloc[i + input_window + prediction_horizon - 1]
        label = int(future_high > last_high)
        X.append(window)
        y.append(label)
    return np.array(X), np.array(y)

X_pct, y_pct = create_features_and_labels(data, features_pct, INPUT_WINDOW, PREDICTION_HORIZON)

# Escalar
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_pct)

# Modelo
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42,
    class_weight='balanced_subsample'
)

# Entrenamiento final con todos los datos
rf_model.fit(X_scaled, y_pct)

# Guardar modelo y scaler
with open('rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Gradio

In [6]:
# Configuración
INPUT_WINDOW = 56

# Cargar modelo y scaler
with open("rf_model.pkl", "rb") as f:
    rf_model = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Descargar y procesar datos desde Yahoo Finance
def fetch_and_process_data(ticker="BTC-USD", period="90d", interval="1d"):
    df = yf.download(ticker, period=period, interval=interval, progress=False)

    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.droplevel(1)

    if df.empty:
        raise ValueError(f"No se pudieron obtener datos para '{ticker}' desde Yahoo Finance.")

    df["Market Cap"] = df["Close"] * df["Volume"]
    df.reset_index(inplace=True)
    df["Date"] = pd.to_datetime(df["Date"]).dt.date

    df = df[["Date", "Open", "High", "Low", "Close", "Volume", "Market Cap"]]

    for col in ["Open", "High", "Low", "Close", "Volume", "Market Cap"]:
        df[f"Pct Diff {col}"] = df[col].pct_change()

    df.dropna(inplace=True)
    df.set_index("Date", inplace=True)
    df.columns = df.columns.str.lower().str.replace(" ", "_")

    return df

# Obtener los últimos INPUT_WINDOW días
def get_latest_window(symbol="BTC-USD"):
    df = fetch_and_process_data(symbol)
    if len(df) < INPUT_WINDOW:
        raise ValueError(f"Se requieren al menos {INPUT_WINDOW} días de datos. Solo se obtuvieron {len(df)}.")
    return df.tail(INPUT_WINDOW)

# Crear ventana de entrada para el modelo
def create_input_window(df):
    df.columns = df.columns.astype(str)
    feature_cols = [col for col in df.columns if col.startswith("pct_diff")]
    X = df[feature_cols].iloc[-INPUT_WINDOW:].values.flatten().reshape(1, -1)
    expected_features = INPUT_WINDOW * len(feature_cols)
    if X.shape[1] != expected_features:
        raise ValueError(f"Se esperaban {expected_features} características, pero se recibieron {X.shape[1]}.")
    return X, df

# Función principal de predicción
def predict_crypto(ticker_input):
    try:
        symbol = ticker_input.strip().upper()
        df = get_latest_window(symbol)
        X_input, df_processed = create_input_window(df)
        X_scaled = scaler.transform(X_input)

        pred = rf_model.predict(X_scaled)[0]
        proba = rf_model.predict_proba(X_scaled)[0]

        label = "Subirá" if pred == 1 else "Bajará"
        prob_str = f"Probabilidad de subida: {proba[1]:.2%}\nProbabilidad de bajada: {proba[0]:.2%}"

        # Generar gráfico de precios
        plt.figure(figsize=(8, 4))
        df_processed["close"].tail(INPUT_WINDOW).plot(title=f"{symbol} - Últimos {INPUT_WINDOW} días", grid=True)
        plt.xlabel("Fecha")
        plt.ylabel("Precio de cierre (USD)")
        plt.tight_layout()
        plt.savefig("price_plot.png")
        plt.close()

        return label, prob_str, "price_plot.png"

    except Exception as e:
        return f"Error: {str(e)}", None, None

# Interfaz de usuario con Gradio
iface = gr.Interface(
    fn=predict_crypto,
    inputs=gr.Textbox(label="Ticker (Yahoo Finance)", placeholder="Ej. BTC-USD, ETH-USD, NEAR-USD"),
    outputs=[
        gr.Label(label="Predicción de dirección"),
        gr.Textbox(label="Probabilidades"),
        gr.Image(label="Gráfico de precios")
    ],
    title="Predicción de criptomonedas con Random Forest",
    description=(
        "Introduce un ticker válido de Yahoo Finance (BTC-USD, ETH-USD, NEAR-USD) para predecir si el precio subirá o bajará en los próximos días usando porcentajes de cambio como entrada del modelo."
    ),
    examples=[["BTC-USD"], ["ETH-USD"], ["NEAR-USD"]]
)

iface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://aaca3222a46d30353b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


