In [9]:
import numpy as np
import pandas as pd

def create_dataset(N, w, days=2920):
    # Gerando retornos logarítmicos aleatórios (substitua esses valores com dados reais no futuro)
    np.random.seed(42)  # Para reprodutibilidade
    data = np.random.randn(days + w, N)  # Gerar mais dias do que o necessário para fazer o shift temporal

    # Criando um array vazio para armazenar os atributos (retornos)
    attributes = np.zeros((days, N * w))

    # Preenchendo os retornos logarítmicos com os valores do passado
    for i in range(days):
        for j in range(N):
            attributes[i, j*w:(j+1)*w] = data[i:i+w, j][::-1]  # Inverter para o formato t-1, t-2, ..., t-w

    # Gerando a idade da cripto1 de forma progressiva
    crypto1_age = np.arange(1, days + 1).reshape(-1, 1)

    # Concatenando os retornos logarítmicos e a idade da cripto1
    attributes = np.hstack([attributes, crypto1_age])

    # O retorno logarítmico alvo deve ser o valor `return_crypto1_t-1` da linha seguinte
    target = data[w:days+w, 0]  # Valor t-1 de cripto1

    # Criando um DataFrame para armazenar os dados
    columns = [f'return_crypto{i+1}_t-{j+1}' for i in range(N) for j in range(w)]
    columns += ['crypto1_age']  # Somente a idade da cripto1
    df = pd.DataFrame(attributes, columns=columns)

    # Adicionando a coluna de valor alvo (retorno logarítmico do dia para a cripto1)
    df['log_return_target'] = target

    return df

# Parâmetros
N = 3  # Número de criptomoedas
w = 5  # Número de retornos logarítmicos passados por criptomoeda

# Criando o dataset
df = create_dataset(N, w)

# Visualizando as primeiras linhas do dataset
df.head()


Unnamed: 0,return_crypto1_t-1,return_crypto1_t-2,return_crypto1_t-3,return_crypto1_t-4,return_crypto1_t-5,return_crypto2_t-1,return_crypto2_t-2,return_crypto2_t-3,return_crypto2_t-4,return_crypto2_t-5,return_crypto3_t-1,return_crypto3_t-2,return_crypto3_t-3,return_crypto3_t-4,return_crypto3_t-5,crypto1_age,log_return_target
0,0.241962,0.54256,1.579213,1.52303,0.496714,-1.91328,-0.463418,0.767435,-0.234153,-0.138264,-1.724918,-0.46573,-0.469474,-0.234137,0.647689,1.0,-0.562288
1,-0.562288,0.241962,0.54256,1.579213,1.52303,-1.012831,-1.91328,-0.463418,0.767435,-0.234153,0.314247,-1.724918,-0.46573,-0.469474,-0.234137,2.0,-0.908024
2,-0.908024,-0.562288,0.241962,0.54256,1.579213,-1.412304,-1.012831,-1.91328,-0.463418,0.767435,1.465649,0.314247,-1.724918,-0.46573,-0.469474,3.0,-0.225776
3,-0.225776,-0.908024,-0.562288,0.241962,0.54256,0.067528,-1.412304,-1.012831,-1.91328,-0.463418,-1.424748,1.465649,0.314247,-1.724918,-0.46573,4.0,-0.544383
4,-0.544383,-0.225776,-0.908024,-0.562288,0.241962,0.110923,0.067528,-1.412304,-1.012831,-1.91328,-1.150994,-1.424748,1.465649,0.314247,-1.724918,5.0,0.375698
