In [2]:
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

In [18]:
features = ['open', 'high', 'low', 'close', 'volume', 'close_percentage',
                'volume_percentage', 'daily_change', 'up_down',
                'mv_7', 'mv_14', 'mv_21', 'volat_7', 'volat_14', 'volat_21']

In [19]:
def add_ons(df):
  df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
  df['close_percentage'] = df.groupby('symbol')['close'].pct_change()
  df['volume_percentage'] = df.groupby('symbol')['volume'].pct_change()

  df['mv_7'] = df.groupby('symbol')['close'].transform(lambda x: x.rolling(window=7).mean())
  df['mv_14'] = df.groupby('symbol')['close'].transform(lambda x: x.rolling(window=14).mean())
  df['mv_21'] = df.groupby('symbol')['close'].transform(lambda x: x.rolling(window=21).mean())

  df['volat_7'] = df.groupby('symbol')['close'].transform(lambda x: x.rolling(window=7).std())
  df['volat_14'] = df.groupby('symbol')['close'].transform(lambda x: x.rolling(window=14).std())
  df['volat_21'] = df.groupby('symbol')['close'].transform(lambda x: x.rolling(window=21).std())

  df['daily_change'] = df['high'] - df['low']
  df['up_down'] = df['close']/df['open']
  return df

In [20]:
def data_scaler(df, features):
    scaler = StandardScaler()
    df[features] = scaler.fit_transform(df[features])
    return df, scaler

In [21]:
def before_training(df, features, train_ratio=0.8):
    df['target'] = df['close'].shift(-1)
    df = df.dropna(subset=['target'])
    df.reset_index(drop=True, inplace=True)
    X = df[features]
    y = df['target']

    train_size = int(train_ratio * len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    return X_train, X_test, y_train, y_test, df['timestamp'].iloc[train_size:]

In [22]:
def model_training(X_train, y_train):
    model = xgb.XGBRegressor(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=5,
        objective='reg:squarederror'
    )
    model.fit(X_train, y_train)
    return model

In [23]:
def predicts(model, X_test, y_test, scaler, features):
    y_pred = model.predict(X_test)

    y_pred_descaled = scaler.inverse_transform([[0] * (len(features) - 1) + [val] for val in y_pred])[:, -1]
    y_test_descaled = scaler.inverse_transform([[0] * (len(features) - 1) + [val] for val in y_test])[:, -1]

    return y_pred_descaled, y_test_descaled

In [24]:
def predecir_precios(df, features):
    scalers = {}
    df_predictions = pd.DataFrame()
    models = {}
    tokens = df['symbol'].unique()
    for token in tokens:
        df_token = df[df['symbol'] == token].copy()

        df_token, scaler = data_scaler(df_token, features)
        scalers[token] = scaler

        X_train, X_test, y_train, y_test, timestamps = before_training(df_token, features)

        model = model_training(X_train, y_train)
        models[token] = model
        y_pred_descaled, y_test_descaled = predicts(model, X_test, y_test, scaler, features)

        token_predictions = pd.DataFrame({
            'symbol': token,
            'timestamp': timestamps,
            'y_test': y_test_descaled,
            'y_pred': y_pred_descaled
        })
        df_predictions = pd.concat([df_predictions, token_predictions], ignore_index=True)

    return df_predictions, models, scalers

In [25]:
def predict_X_future_prices(days_to_predict, models, df, scalers, features):
  tokens = df['symbol'].unique()
  future_prices_per_token = {}

  for token in tokens:
      model=models[token]
      df_token = df[df['symbol'] == token].copy()
      X_last = df_token[features].iloc[-1].copy().values.reshape(1, -1)
      ultima_fecha = df_token['timestamp'].max()

      predicciones_futuras = []

      for i in range(days_to_predict):
          y_next = model.predict(X_last)

          y_next_descaled = scalers[token].inverse_transform([[0] * (len(features) - 1) + [val] for val in y_next])[:, -1]
          fecha_prediccion = ultima_fecha + pd.Timedelta(days=i + 1)
          predicciones_futuras.append({'fecha': fecha_prediccion, 'prediccion': y_next_descaled[0]})

          X_last = np.concatenate([X_last[:, 1:], [[y_next[0]]]], axis=1)

      future_prices_per_token[token] = pd.DataFrame(predicciones_futuras)

  for token, predicciones in future_prices_per_token.items():
      print(f"Predicciones para los próximos {days_to_predict} días para {token}:")
      print(predicciones)

  return future_prices_per_token


In [17]:
gaming = pd.read_csv('../data/processed/gaming.csv')
meme = pd.read_csv('../data/processed/meme.csv')
ai = pd.read_csv('../data/processed/ai.csv')
rwa = pd.read_csv('../data/processed/rwa.csv')

In [26]:
gaming = add_ons(gaming)
meme = add_ons(meme)
ai = add_ons(ai)
rwa = add_ons(rwa)

In [27]:
df = pd.concat([gaming, meme, ai, rwa], ignore_index=True)

In [None]:
df_predictions_gaming, model_gaming, scalers_gaming = predecir_precios(gaming, features)

In [None]:
df_predictions_ai, model_ai, scalers_ai = predecir_precios(ai, features)

In [None]:
df_predictions_meme, model_meme, scalers_meme = predecir_precios(meme, features)

In [None]:
df_predictions_rwa, model_rwa, scalers_rwa = predecir_precios(rwa, features)

In [35]:
import joblib

model_folder = '../data/models/'
scaler_folder = '../data/scalers/'



for token, model in model_gaming.items():
    model_filename = f"{model_folder}{token}_model.joblib"
    joblib.dump(model, model_filename)
for token, model in model_ai.items():
    model_filename = f"{model_folder}{token}_model.joblib"
    joblib.dump(model, model_filename)
for token, model in model_meme.items():
    model_filename = f"{model_folder}{token}_model.joblib"
    joblib.dump(model, model_filename)
for token, model in model_rwa.items():
    model_filename = f"{model_folder}{token}_model.joblib"
    joblib.dump(model, model_filename)
    
for token, scaler in scalers_gaming.items():
    scaler_filename = f"{scaler_folder}{token}_scaler.joblib"
    joblib.dump(scaler, scaler_filename)
for token, scaler in scalers_ai.items():
    scaler_filename = f"{scaler_folder}{token}_scaler.joblib"
    joblib.dump(scaler, scaler_filename)
for token, scaler in scalers_meme.items():
    scaler_filename = f"{scaler_folder}{token}_scaler.joblib"
    joblib.dump(scaler, scaler_filename)
for token, scaler in scalers_rwa.items():
    scaler_filename = f"{scaler_folder}{token}_scaler.joblib"
    joblib.dump(scaler, scaler_filename)