# Model Upload

In [1]:
# Install dependencies
!pip install -q numerapi pandas matplotlib lightgbm cloudpickle pyarrow

In [1]:
# Download data
from numerapi import NumerAPI
import pandas as pd
import json
napi = NumerAPI()
napi.download_dataset("v4.1/train.parquet");
napi.download_dataset("v4.1/features.json");
feature_metadata = json.load(open("v4.1/features.json")) 
feature_cols = feature_metadata["feature_sets"]["small"]
training_data = pd.read_parquet(
    "v4.1/train.parquet",
    columns=["era"]+feature_cols+["target"]
)

# Train model
import lightgbm as lgb
model = lgb.LGBMRegressor(
    n_estimators=2000,
    learning_rate=0.01,
    max_depth=5,
    num_leaves=2 ** 5,
    colsample_bytree=0.1
)
model.fit(
    training_data[feature_cols],
    training_data["target"]
);

In [3]:
# Wrap your model with a function that takes live features and returns live predictions
def predict(live_features: pd.DataFrame) -> pd.DataFrame:
    live_predictions = model.predict(live_features[feature_cols])
    submission = pd.Series(live_predictions, index=live_features.index)
    return submission.to_frame("prediction")

# Use the cloudpickle library to serialize your function
import cloudpickle
p = cloudpickle.dumps(predict)
with open("predict.pkl", "wb") as f:
    f.write(p)

In [5]:
# Download the model file if running in Google Colab
try:
    from google.colab import files
    files.download('predict.pkl')
except:
    pass