In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import tensorflow as tf
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# --- Ler CSV e converter colunas ---
df = pd.read_csv('/content/Dados Históricos - Bitcoin.csv', low_memory=False)
df.columns = [c.strip() for c in df.columns]
df = df.rename(columns={'Data':'Date','Último':'Close'})

df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y', errors='coerce')
df['Close'] = (df['Close'].astype(str).str.replace('.','', regex=False)
                            .str.replace(',', '.', regex=False).astype(float))
df = df.dropna(subset=['Date','Close']).sort_values('Date').reset_index(drop=True)

# Limitar intervalo
df = df[(df['Date']>=pd.Timestamp('2019-01-08')) & (df['Date']<=pd.Timestamp('2025-10-08'))].reset_index(drop=True)

# --- Transformação ---
df['Close_log'] = np.log(df['Close'])
df['Close_log'] = df['Close_log'].rolling(window=5, min_periods=1).mean()

# --- Escala ---
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[['Close_log']].values)

# --- Sequências ---
SEQ_LEN = 60
X, y, target_dates = [], [], []
for i in range(SEQ_LEN, len(scaled)):
    X.append(scaled[i-SEQ_LEN:i, 0])
    y.append(scaled[i, 0])
    target_dates.append(df['Date'].iloc[i])
X = np.array(X).reshape(-1, SEQ_LEN, 1)
y = np.array(y)
target_dates = pd.to_datetime(target_dates)

# --- Divisão treino/teste ---
train_end = pd.Timestamp('2024-12-31')
test_end  = pd.Timestamp('2025-10-08')

train_mask = target_dates <= train_end
test_mask  = (target_dates > train_end) & (target_dates <= test_end)

X_train, y_train = X[train_mask], y[train_mask]
X_test,  y_test  = X[test_mask],  y[test_mask]
dates_test = target_dates[test_mask]

print(f"Conjuntos → treino: {X_train.shape[0]} | teste: {X_test.shape[0]}")

In [None]:
# --- Modelo LSTM ---
model = Sequential([
    Input(shape=(SEQ_LEN,1)),
    LSTM(128, return_sequences=True),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
early = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
callbacks=[early],

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=120, batch_size=32, callbacks=[early], verbose=1
)

In [None]:
# --- Previsões ---
y_pred_test = model.predict(X_test, verbose=0).flatten()

# Reverter escala e log
y_test_real = np.exp(scaler.inverse_transform(y_test.reshape(-1,1)).flatten())
y_pred_real = np.exp(scaler.inverse_transform(y_pred_test.reshape(-1,1)).flatten())

In [None]:
# === Métricas ===
rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_real))
mape = mean_absolute_percentage_error(y_test_real, y_pred_real) * 100
r2 = r2_score(y_test_real, y_pred_real)
erro_absoluto = np.abs(y_test_real - y_pred_real)

print(f"\nRMSE: {rmse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"R²: {r2:.4f}")

In [None]:
# === Gráficos de treinamento ===
plt.figure(figsize=(9,4))
plt.plot(history.history['loss'], label='Treino')
plt.plot(history.history.get('val_loss',[]), label='Validação')

plt.title('Loss por época', fontsize=28, weight='bold')
plt.xlabel('Época', fontsize=22)
plt.ylabel('MSE', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.legend(fontsize=18)
plt.grid(True)
plt.tight_layout()
plt.show()


# === Gráfico Real vs Previsto ===
plt.figure(figsize=(14,6))
hist_mask = df['Date'] <= train_end

plt.plot(df.loc[hist_mask, 'Date'], df.loc[hist_mask, 'Close'], label='Histórico', alpha=0.5)
plt.plot(dates_test, y_test_real, label='Real (2025)', linewidth=2)
plt.plot(dates_test, y_pred_real, label='Previsto (2025)', linewidth=2, linestyle='--')

plt.axvline(train_end, color='gray', linestyle='--')

plt.title('BTC — Treino até 2024 | Teste em 2025', fontsize=28, weight='bold')
plt.xlabel('Data', fontsize=22)
plt.ylabel('Preço de Fechamento (USD)', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.legend(fontsize=18)
plt.grid(True)
plt.tight_layout()
plt.show()


# === Gráfico somente Real vs Previsto ===
plt.figure(figsize=(14,6))
plt.plot(dates_test, y_test_real, label='Real 2025', linewidth=2)
plt.plot(dates_test, y_pred_real, label='Previsto 2025 (LSTM)', linewidth=2, linestyle='--')

plt.title('BTC — Real vs Previsto (2025, LSTM)', fontsize=28, weight='bold')
plt.xlabel('Data', fontsize=22)
plt.ylabel('Preço de Fechamento (USD)', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.legend(fontsize=18)
plt.grid(True)
plt.tight_layout()
plt.show()


# === Comparação diária ===
comparison_df = pd.DataFrame({
    'Data': dates_test.values,
    'Real': y_test_real,
    'Previsto': y_pred_real,
    'Erro Absoluto': erro_absoluto,
    'Diferença %': (y_pred_real - y_test_real) / y_test_real * 100
})

comparison_df.to_csv("comparacao_real_vs_previsto_2025.csv", index=False)


# === Gráfico Erro Absoluto ===
plt.figure(figsize=(14,6))
plt.plot(dates_test, erro_absoluto, linewidth=1.8)

plt.title('BTC — Erro Absoluto das Previsões (|Real − Previsto|)', fontsize=28, weight='bold')
plt.xlabel('Data', fontsize=22)
plt.ylabel('Erro Absoluto', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nArquivo 'comparacao_real_vs_previsto_2025.csv' salvo")
