In [1]:
# [1] Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from prophet import Prophet
from sklearn.metrics import mean_absolute_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')


In [2]:
# [2] Carregar CSV
def load_csv(path='data/btc_limpo.csv'):
    df = pd.read_csv(path)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date').reset_index(drop=True)
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
    df = df.dropna(subset=['Close']).reset_index(drop=True)
    return df

df = load_csv()
print('Dataset:', df['Date'].min(), '->', df['Date'].max(), '| pontos =', len(df))
df.tail()


Dataset: 2020-06-01 00:00:00 -> 2025-05-31 00:00:00 | pontos = 1826


Unnamed: 0,Date,Close,Hight,Low,Open,Volume
1821,2025-05-27,108994.640625,110744.210938,107609.554688,109440.40625,57450176272
1822,2025-05-28,107802.328125,109298.289062,106812.929688,108992.171875,49155377493
1823,2025-05-29,105641.757812,108910.046875,105374.398438,107795.570312,56022752042
1824,2025-05-30,103998.570312,106308.945312,103685.789062,105646.210938,57655287183
1825,2025-05-31,104638.09375,104927.101562,103136.117188,103994.71875,38997843858


In [3]:
# [3] Split
def train_test_split(df, test_size=60):
    train = df.iloc[:-test_size].copy()
    test = df.iloc[-test_size:].copy()
    return train, test

train_df, test_df = train_test_split(df)
print('Treino:', train_df.shape, '| Teste:', test_df.shape)


Treino: (1766, 6) | Teste: (60, 6)


In [4]:
# [4] Avaliação
def mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate(true, pred, name):
    score = mape(true, pred)
    print(f'{name} MAPE: {score:.2f}%')
    return score


In [5]:
# [5] Prophet
def run_prophet(train_df, test_df):
    prophet_df = train_df[['Date','Close']].rename(columns={'Date':'ds','Close':'y'})
    prophet_df['ds'] = pd.to_datetime(prophet_df['ds'])
    prophet_df['y'] = prophet_df['y'].astype(float)

    model = Prophet()
    model.fit(prophet_df)

    future = model.make_future_dataframe(periods=len(test_df), freq='D')
    forecast = model.predict(future)

    preds = forecast[['ds','yhat']].tail(len(test_df)).reset_index(drop=True)
    preds['yhat'] = preds['yhat'].astype(float)
    return preds['yhat'].values


In [6]:
# [6] LSTM
def run_lstm(train_df, test_df, lookback=30, epochs=10):
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled = scaler.fit_transform(train_df[['Close']])

    X, y = [], []
    for i in range(lookback, len(scaled)):
        X.append(scaled[i-lookback:i, 0])
        y.append(scaled[i, 0])
    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1],1)))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X, y, epochs=epochs, batch_size=32, verbose=0)

    # Previsão no teste
    inputs = np.concatenate((scaled[-lookback:], MinMaxScaler().fit_transform(test_df[['Close']])))
    X_test, y_test = [], []
    for i in range(lookback, len(inputs)):
        X_test.append(inputs[i-lookback:i, 0])
        y_test.append(inputs[i, 0])
    X_test, y_test = np.array(X_test), np.array(y_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    preds_scaled = model.predict(X_test)
    preds = scaler.inverse_transform(preds_scaled)
    return preds.ravel()


In [7]:
# [7] Rodar experimentos
results = []

# Prophet (baseline)
prophet_preds = run_prophet(train_df, test_df)
prophet_score = evaluate(test_df['Close'].values, prophet_preds, "Prophet")
results.append(['Prophet', None, None, prophet_score])

# LSTM com várias configs
lookbacks = [15, 30, 60]
epochs_list = [10, 30, 50]

for lb in lookbacks:
    for ep in epochs_list:
        lstm_preds = run_lstm(train_df, test_df, lookback=lb, epochs=ep)
        score = evaluate(test_df['Close'].values[:len(lstm_preds)], lstm_preds, f"LSTM lb={lb}, ep={ep}")
        results.append(['LSTM', lb, ep, score])

        # gráfico
        plt.figure(figsize=(10,5))
        plt.plot(test_df['Date'][:len(lstm_preds)], test_df['Close'][:len(lstm_preds)], label='Real')
        plt.plot(test_df['Date'][:len(lstm_preds)], lstm_preds, label=f'LSTM lb={lb}, ep={ep}')
        plt.title(f'LSTM Previsão | Lookback={lb}, Epochs={ep}')
        plt.legend()
        plt.savefig(f'lstm_lb{lb}_ep{ep}.png')
        plt.close()

# Tabela de resultados
results_df = pd.DataFrame(results, columns=['Modelo','Lookback','Epochs','MAPE'])
print(results_df.sort_values('MAPE'))
results_df.to_csv('results_23-09.csv', index=False)


00:44:40 - cmdstanpy - INFO - Chain [1] start processing
00:44:44 - cmdstanpy - INFO - Chain [1] done processing


Prophet MAPE: 13.82%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step   
LSTM lb=15, ep=10 MAPE: 38.95%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2s/step
LSTM lb=15, ep=30 MAPE: 37.79%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 361ms/step
LSTM lb=15, ep=50 MAPE: 37.16%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 308ms/step
LSTM lb=30, ep=10 MAPE: 34.90%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 404ms/step
LSTM lb=30, ep=30 MAPE: 39.14%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 432ms/step
LSTM lb=30, ep=50 MAPE: 38.82%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 373ms/step
LSTM lb=60, ep=10 MAPE: 36.52%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 316ms/step
LSTM lb=60, ep=30 MAPE: 37.92%
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 361ms/step
LSTM lb=60, ep=50 MAPE: 37.65%
    Modelo  Lookback  Epochs       MAPE
0  Pro