In [None]:
## Load secrets from file
import os
FP_Secrets = 'Numerai.secrets'

if not os.path.exists(FP_Secrets):
    raise FileNotFoundError(f"'{FP_Secrets}' not found. Make sure the file exists.")

# Read API keys 
api_keys = {}
with open(FP_Secrets, 'r') as secrets_file:
    for line in secrets_file:
        key, value = line.strip().split('=')
        api_keys[key] = value

# Set your Numerai API credentials
PUBLIC_KEY = api_keys.get('PUBLIC_KEY')
SECRET_KEY = api_keys.get('SECRET_KEY')

if not PUBLIC_KEY or not SECRET_KEY:
    raise ValueError("API keys not found in the 'numerai.secrets' file.")


In [None]:
import pandas as pd
import numerapi

# Set your Numerai API credentials
napi = numerapi.NumerAPI(public_id=PUBLIC_KEY, secret_key=SECRET_KEY)

# Download the latest Numerai datasets
napi.download_dataset("v4.1/train.parquet", "train.parquet")
napi.download_dataset("v4.1/validation.parquet", "validation.parquet")
napi.download_dataset("v4.1/live.parquet", "live.parquet")
napi.download_dataset("v4.1/live_example_preds.parquet", "live_example_preds.parquet")
napi.download_dataset("v4.1/validation_example_preds.parquet", "validation_example_preds.parquet")
napi.download_dataset("v4.1/features.json", "features.json")
napi.download_dataset("v4.1/meta_model.parquet", "meta_model.parquet")

# Load the data into pandas DataFrames using `pd.read_parquet`
train_data = pd.read_parquet("train.parquet")
validation_data = pd.read_parquet("validation.parquet")
live_data = pd.read_parquet("live.parquet")
live_example_preds = pd.read_parquet("live_example_preds.parquet")
validation_example_preds = pd.read_parquet("validation_example_preds.parquet")

# Display basic info about the data
print("Training data shape:", train_data.shape)
print("Validation data shape:", validation_data.shape)
print("Live data shape:", live_data.shape)

# Challenge: How might you use the additional files like 'features.json' and 'meta_model.parquet' in your ML models?


In [None]:
## Initializes Numerai Data and NumerAPI
import numpy as np
import pandas as pd
import numerapi
import re

# Set your Numerai API credentials
napi = numerapi.NumerAPI(public_id=PUBLIC_KEY, secret_key=SECRET_KEY)

# Download the latest Numerai dataset
napi.download_current_dataset(unzip=True)

f_pattern = r"numerai_dataset_\d+"
f_name = None
print(os.listdir())
for file in os.listdir():
    if re.match(f_pattern, file):
        f_name = file
        break

assert f_name != None
f_name = f_name.replace('.zip', '') 


In [None]:
t_data = os.path.join(f_name, "numerai_training_data.csv")
tor_data = os.path.join(f_name, "numerai_tournament_data.csv")

# Load the data into pandas DataFrames
train_data = pd.read_csv(t_data)
tournament_data = pd.read_csv(tor_data)

# Display basic info about the data
print("Training data shape:", train_data.shape)
print("Tournament data shape:", tournament_data.shape)