In [19]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=100)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 396.3335734345713, RMSE: 19.90812832575105, MAE: 15.13707896596086, R2: -0.3399491229296603
          Date  Actual  Predicted
374 2023-07-03   -1.11 -15.314046
375 2023-07-04    0.22 -15.184363
376 2023-07-05   -2.45 -15.745965
377 2023-07-06    7.16  -8.330201
378 2023-07-07   -0.22  -7.311473


In [1]:
import pandas as pd
import xgboost as xgb

# Carregar dados de treino e teste
train_data_path = "C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv"
test_data_path = "C:/Users/milen/OneDrive/Documentos/TCC/Bases/teste.csv"
train_df = pd.read_csv(train_data_path)
test_df = pd.read_csv(test_data_path)

# Processamento dos dados de treino e teste
# (assumindo que 'Preco_Real' é a variável que você está prevendo)
train_df['Date'] = pd.to_datetime(train_df['Date'])
train_df['Diferenca_1_Dia'] = train_df['Preco_Real'].diff(periods=1)
train_df.dropna(inplace=True)  # Remover linhas com NaN após cálculo da diferença

test_df['Date'] = pd.to_datetime(test_df['Date'])
test_df['Diferenca_1_Dia'] = test_df['Preco_Real'].diff(periods=1)
test_df.fillna(method='bfill', inplace=True)  # Preencher NaN

# Preparar conjuntos de treino e teste
X_train = train_df.drop(['Diferenca_1_Dia', 'Date', 'Preco_Real'], axis=1)  # Excluindo colunas não utilizadas
y_train = train_df['Diferenca_1_Dia']
X_test = test_df.drop(['Diferenca_1_Dia', 'Date', 'Preco_Real'], axis=1)
y_test = test_df['Diferenca_1_Dia']

# Treinar modelo XGBoost
model = xgb.XGBRegressor(objective ='reg:squarederror')
model.fit(X_train, y_train)

# Fazer previsões
predictions = model.predict(X_test)

# Avaliar o modelo
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mse = mean_squared_error(y_test, predictions)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("R2:", r2)

# Adicionando previsões ao DataFrame de teste para visualização
test_df['Diferenca_Pontos_Predita'] = predictions
print(test_df[['Date', 'Diferenca_Pontos_Predita', 'Diferenca_1_Dia']])


MSE: 389.3280240506169
RMSE: 19.73139691077692
MAE: 15.00519107623913
R2: -0.30494147799875804
          Date  Diferenca_Pontos_Predita  Diferenca_1_Dia
0   2022-01-03                 13.576487            40.10
1   2022-01-04                  3.524353            40.10
2   2022-01-05                  7.479208            -1.32
3   2022-01-06                  3.227375            13.34
4   2022-01-07                 -1.041847            27.04
..         ...                       ...              ...
453 2023-10-24                 -2.430463             4.38
454 2023-10-25                 -0.698361           -25.06
455 2023-10-26                  2.110602            -3.07
456 2023-10-27                  3.158530            -6.19
457 2023-10-30                 -0.590904            -4.67

[458 rows x 3 columns]


In [2]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar dados de treino e teste
train_data_path = "C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv"
test_data_path = "C:/Users/milen/OneDrive/Documentos/TCC/Bases/teste.csv"
train_df = pd.read_csv(train_data_path)
test_df = pd.read_csv(test_data_path)

# Processamento dos dados de treino e teste
train_df['Date'] = pd.to_datetime(train_df['Date'])
train_df['Diferenca_1_Dia'] = train_df['Preco_Real'].diff(periods=1)
train_df.dropna(inplace=True)  # Remover linhas com NaN após cálculo da diferença

test_df['Date'] = pd.to_datetime(test_df['Date'])
test_df['Diferenca_1_Dia'] = test_df['Preco_Real'].diff(periods=1)
test_df.fillna(method='bfill', inplace=True)  # Preencher NaN

# Preparar conjuntos de treino e teste
X_train = train_df.drop(['Diferenca_1_Dia', 'Date', 'Preco_Real'], axis=1)
y_train = train_df['Diferenca_1_Dia']
X_test = test_df.drop(['Diferenca_1_Dia', 'Date', 'Preco_Real'], axis=1)
y_test = test_df['Diferenca_1_Dia']

# Normalização dos dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Construir o modelo de Deep Learning
model = Sequential()
model.add(Dense(50, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Treinar o modelo
model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, verbose=0)

# Fazer previsões
predictions = model.predict(X_test_scaled)

# Avaliar o modelo
mse = mean_squared_error(y_test, predictions)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("R2:", r2)

# Adicionando previsões ao DataFrame de teste para visualização
test_df['Diferenca_Pontos_Predita'] = predictions.flatten()
print(test_df[['Date', 'Diferenca_Pontos_Predita', 'Diferenca_1_Dia']])


MSE: 359.44020989352066
RMSE: 18.95890845733268
MAE: 14.66641922790639
R2: -0.20476413146579264
          Date  Diferenca_Pontos_Predita  Diferenca_1_Dia
0   2022-01-03                  7.367544            40.10
1   2022-01-04                  2.949821            40.10
2   2022-01-05                 -3.340913            -1.32
3   2022-01-06                 -1.043845            13.34
4   2022-01-07                 11.748455            27.04
..         ...                       ...              ...
453 2023-10-24                 -9.799545             4.38
454 2023-10-25                 -9.116490           -25.06
455 2023-10-26                -16.830322            -3.07
456 2023-10-27                -11.090653            -6.19
457 2023-10-30                 -8.434658            -4.67

[458 rows x 3 columns]


In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar dados de treino e teste
train_data_path = "C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv"
test_data_path = "C:/Users/milen/OneDrive/Documentos/TCC/Bases/teste.csv"
train_df = pd.read_csv(train_data_path)
test_df = pd.read_csv(test_data_path)

# Processamento dos dados
# (Por exemplo, aplicando transformações logarítmicas, raiz quadrada, etc., conforme necessário)
# ...

# Convertendo 'Date' para datetime e extração de características de data
for df in [train_df, test_df]:
    df['Date'] = pd.to_datetime(df['Date'])
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day
    # Incluir outras transformações ou extrações de features relevantes aqui

# Cálculo da Diferença (pode ser primeira diferença ou outra transformação apropriada)
train_df['Diferenca_1_Dia'] = train_df['Preco_Real'].diff().fillna(0)
test_df['Diferenca_1_Dia'] = test_df['Preco_Real'].diff().fillna(0)

# Preparar conjuntos de treino e teste
X_train = train_df.drop(['Date', 'Preco_Real', 'Diferenca_1_Dia'], axis=1)
y_train = train_df['Diferenca_1_Dia']
X_test = test_df.drop(['Date', 'Preco_Real', 'Diferenca_1_Dia'], axis=1)
y_test = test_df['Diferenca_1_Dia']

# Normalização dos dados (se necessário)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Treinar o modelo Random Forest
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Fazer previsões
predictions = model.predict(X_test_scaled)

# Avaliação do modelo
mse = mean_squared_error(y_test, predictions)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("R2:", r2)

# Adicionando previsões ao DataFrame de teste para visualização
test_df['Diferenca_Pontos_Predita'] = predictions
print(test_df[['Date', 'Diferenca_Pontos_Predita', 'Diferenca_1_Dia']])


MSE: 348.2784691580569
RMSE: 18.6622203705255
MAE: 14.073481877729265
R2: -0.1821079631282847
          Date  Diferenca_Pontos_Predita  Diferenca_1_Dia
0   2022-01-03                    9.1845             0.00
1   2022-01-04                   11.1441            40.10
2   2022-01-05                    7.2134            -1.32
3   2022-01-06                    9.7806            13.34
4   2022-01-07                   15.1379            27.04
..         ...                       ...              ...
453 2023-10-24                   10.1692             4.38
454 2023-10-25                    8.7744           -25.06
455 2023-10-26                    0.3803            -3.07
456 2023-10-27                    1.6855            -6.19
457 2023-10-30                   -1.0074            -4.67

[458 rows x 3 columns]


In [13]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 315.74337814663494, RMSE: 17.769169315042134, MAE: 13.41529832454101, R2: -0.06748479305469601
          Date  Actual  Predicted
374 2023-07-03   -1.11  -6.370439
375 2023-07-04    0.22  -7.605386
376 2023-07-05   -2.45  -3.382757
377 2023-07-06    7.16  -3.253150
378 2023-07-07   -0.22  -9.897977


In [14]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 426.71428730335106, RMSE: 20.657063859690975, MAE: 15.116748338539963, R2: -0.44266212437859975
          Date  Actual  Predicted
374 2023-07-03   -1.11  -4.885148
375 2023-07-04    0.22  -6.069565
376 2023-07-05   -2.45  -1.513067
377 2023-07-06    7.16   5.000689
378 2023-07-07   -0.22  -1.259574


In [15]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/ibovespa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 386.9768127233689, RMSE: 19.671726226322104, MAE: 14.871742986441316, R2: -0.30831520607576013
          Date  Actual  Predicted
374 2023-07-03   -1.11   7.204421
375 2023-07-04    0.22   3.600235
376 2023-07-05   -2.45  -1.638054
377 2023-07-06    7.16 -13.234259
378 2023-07-07   -0.22  -3.461109


In [16]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 460.0685414016277, RMSE: 21.44920840967395, MAE: 15.929329337605058, R2: -0.5554282550337837
          Date  Actual  Predicted
374 2023-07-03   -1.11  -6.776595
375 2023-07-04    0.22  -0.526764
376 2023-07-05   -2.45  -4.549476
377 2023-07-06    7.16  -1.535267
378 2023-07-07   -0.22  -3.553224


30 DIAS

In [18]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 442.6820671156061, RMSE: 21.04001110065311, MAE: 15.79041525471247, R2: -0.4966469845789361
          Date  Actual  Predicted
374 2023-07-03   -1.11  -8.505659
375 2023-07-04    0.22  -4.532252
376 2023-07-05   -2.45  -6.252424
377 2023-07-06    7.16   0.150635
378 2023-07-07   -0.22  -2.835280


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({


In [20]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 323.98360949534475, RMSE: 17.999544702445803, MAE: 13.528114205646684, R2: -0.09534387820046675
          Date  Actual  Predicted
374 2023-07-03   -1.11 -16.242306
375 2023-07-04    0.22  -6.513949
376 2023-07-05   -2.45 -17.532143
377 2023-07-06    7.16  -7.005100
378 2023-07-07   -0.22  -3.776659


  predictions_df = predictions_df.append(pd.DataFrame({


In [21]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 393.36659874263745, RMSE: 19.833471676502768, MAE: 14.926131824062614, R2: -0.329918190899958
          Date  Actual  Predicted
374 2023-07-03   -1.11 -11.563433
375 2023-07-04    0.22  -4.815076
376 2023-07-05   -2.45 -11.632878
377 2023-07-06    7.16   5.089793
378 2023-07-07   -0.22   6.360083


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({


In [22]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/ibovespa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 395.5338921482305, RMSE: 19.888033893480536, MAE: 15.111437561270549, R2: -0.337245510846107
          Date  Actual  Predicted
374 2023-07-03   -1.11   6.815723
375 2023-07-04    0.22   2.645179
376 2023-07-05   -2.45 -17.505465
377 2023-07-06    7.16 -11.555002
378 2023-07-07   -0.22   0.805214


  predictions_df = predictions_df.append(pd.DataFrame({


In [23]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 442.6820671156061, RMSE: 21.04001110065311, MAE: 15.79041525471247, R2: -0.4966469845789361
          Date  Actual  Predicted
374 2023-07-03   -1.11  -8.505659
375 2023-07-04    0.22  -4.532252
376 2023-07-05   -2.45  -6.252424
377 2023-07-06    7.16   0.150635
378 2023-07-07   -0.22  -2.835280


In [24]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 386.49697900042554, RMSE: 19.659526418518467, MAE: 14.974858687231789, R2: -0.3066929544692705
          Date  Actual  Predicted
374 2023-07-03   -1.11 -15.688412
375 2023-07-04    0.22 -20.903116
376 2023-07-05   -2.45 -14.772523
377 2023-07-06    7.16 -17.650747
378 2023-07-07   -0.22 -18.677464


  predictions_df = predictions_df.append(pd.DataFrame({


walkforward

In [27]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar o dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()
dolar_data = dolar_data.dropna()

# Configurações para o período de teste
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Preparar dataframe para armazenar previsões e métricas
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])

# Inicializar o Regressor XGBoost
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Loop de validação walk-forward
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    train_data = dolar_data[dolar_data['Date'] < current_date]
    test_data = dolar_data[(dolar_data['Date'] >= current_date) & (dolar_data['Date'] < current_date + pd.Timedelta(days=30))]
    
    if test_data.empty:
        continue
    
    X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_data['Price_Diff']
    X_test = test_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_data['Price_Diff']
    
    xgb_regressor.fit(X_train, y_train)
    y_pred = xgb_regressor.predict(X_test)
    
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_data['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)

grouped_predictions = predictions_df.groupby('Date').mean().reset_index()

# Calcular as métricas para o período completo
overall_mse = mean_squared_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_r2 = r2_score(grouped_predictions['Actual'], grouped_predictions['Predicted'])

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# Exibir previsões para um período específico
desired_prediction_period = grouped_predictions[
    (grouped_predictions['Date'] >= pd.to_datetime('2023-07-01')) &
    (grouped_predictions['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)

  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 340.6571529510576, RMSE: 18.456899873788597, MAE: 13.836105495954433, R2: -0.15413771062643167
          Date  Actual  Predicted
374 2023-07-03   -1.11  -9.260904
375 2023-07-04    0.22 -11.684812
376 2023-07-05   -2.45 -11.782151
377 2023-07-06    7.16 -10.939341
378 2023-07-07   -0.22 -11.500296


In [28]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar o dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv'
dolar_data = pd.read_csv(dolar_file_path)
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()
dolar_data = dolar_data.dropna()

# Configurações para o período de teste
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Preparar dataframe para armazenar previsões e métricas
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])

# Inicializar o Regressor XGBoost
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Loop de validação walk-forward
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    train_data = dolar_data[dolar_data['Date'] < current_date]
    test_data = dolar_data[(dolar_data['Date'] >= current_date) & (dolar_data['Date'] < current_date + pd.Timedelta(days=30))]
    
    if test_data.empty:
        continue
    
    X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_data['Price_Diff']
    X_test = test_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_data['Price_Diff']
    
    xgb_regressor.fit(X_train, y_train)
    y_pred = xgb_regressor.predict(X_test)
    
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_data['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)

grouped_predictions = predictions_df.groupby('Date').mean().reset_index()

# Calcular as métricas para o período completo
overall_mse = mean_squared_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_r2 = r2_score(grouped_predictions['Actual'], grouped_predictions['Predicted'])

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# Exibir previsões para um período específico
desired_prediction_period = grouped_predictions[
    (grouped_predictions['Date'] >= pd.to_datetime('2023-07-01')) &
    (grouped_predictions['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)

  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 310.56346251586575, RMSE: 17.622810857404836, MAE: 13.190160139674042, R2: -0.052181058073291764
          Date  Actual  Predicted
374 2023-07-03   -1.11  -4.819553
375 2023-07-04    0.22  -4.583781
376 2023-07-05   -2.45  -5.975314
377 2023-07-06    7.16  -4.373817
378 2023-07-07   -0.22  -6.046193


In [29]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar o dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv'
dolar_data = pd.read_csv(dolar_file_path)
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()
dolar_data = dolar_data.dropna()

# Configurações para o período de teste
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Preparar dataframe para armazenar previsões e métricas
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])

# Inicializar o Regressor XGBoost
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Loop de validação walk-forward
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    train_data = dolar_data[dolar_data['Date'] < current_date]
    test_data = dolar_data[(dolar_data['Date'] >= current_date) & (dolar_data['Date'] < current_date + pd.Timedelta(days=30))]
    
    if test_data.empty:
        continue
    
    X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_data['Price_Diff']
    X_test = test_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_data['Price_Diff']
    
    xgb_regressor.fit(X_train, y_train)
    y_pred = xgb_regressor.predict(X_test)
    
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_data['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)

grouped_predictions = predictions_df.groupby('Date').mean().reset_index()

# Calcular as métricas para o período completo
overall_mse = mean_squared_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_r2 = r2_score(grouped_predictions['Actual'], grouped_predictions['Predicted'])

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# Exibir previsões para um período específico
desired_prediction_period = grouped_predictions[
    (grouped_predictions['Date'] >= pd.to_datetime('2023-07-01')) &
    (grouped_predictions['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)

  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 387.20921072210024, RMSE: 19.677632243796513, MAE: 14.549227950826122, R2: -0.31185489024643376
          Date  Actual  Predicted
374 2023-07-03   -1.11   0.843873
375 2023-07-04    0.22  -3.371294
376 2023-07-05   -2.45  -4.736497
377 2023-07-06    7.16   3.487967
378 2023-07-07   -0.22   2.090846


In [30]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar o dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/ibovespa.csv'
dolar_data = pd.read_csv(dolar_file_path)
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()
dolar_data = dolar_data.dropna()

# Configurações para o período de teste
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Preparar dataframe para armazenar previsões e métricas
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])

# Inicializar o Regressor XGBoost
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Loop de validação walk-forward
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    train_data = dolar_data[dolar_data['Date'] < current_date]
    test_data = dolar_data[(dolar_data['Date'] >= current_date) & (dolar_data['Date'] < current_date + pd.Timedelta(days=30))]
    
    if test_data.empty:
        continue
    
    X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_data['Price_Diff']
    X_test = test_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_data['Price_Diff']
    
    xgb_regressor.fit(X_train, y_train)
    y_pred = xgb_regressor.predict(X_test)
    
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_data['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)

grouped_predictions = predictions_df.groupby('Date').mean().reset_index()

# Calcular as métricas para o período completo
overall_mse = mean_squared_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_r2 = r2_score(grouped_predictions['Actual'], grouped_predictions['Predicted'])

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# Exibir previsões para um período específico
desired_prediction_period = grouped_predictions[
    (grouped_predictions['Date'] >= pd.to_datetime('2023-07-01')) &
    (grouped_predictions['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)

  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 364.6798974281071, RMSE: 19.096593869800632, MAE: 14.323665900715566, R2: -0.2355261537385862
          Date  Actual  Predicted
374 2023-07-03   -1.11  10.922250
375 2023-07-04    0.22   0.484366
376 2023-07-05   -2.45   2.550849
377 2023-07-06    7.16  -8.878823
378 2023-07-07   -0.22  -0.760077


In [31]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Carregar o dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()
dolar_data = dolar_data.dropna()

# Configurações para o período de teste
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Preparar dataframe para armazenar previsões e métricas
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])

# Inicializar o Regressor XGBoost
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Loop de validação walk-forward
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    train_data = dolar_data[dolar_data['Date'] < current_date]
    test_data = dolar_data[(dolar_data['Date'] >= current_date) & (dolar_data['Date'] < current_date + pd.Timedelta(days=30))]
    
    if test_data.empty:
        continue
    
    X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_data['Price_Diff']
    X_test = test_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_data['Price_Diff']
    
    xgb_regressor.fit(X_train, y_train)
    y_pred = xgb_regressor.predict(X_test)
    
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_data['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)

grouped_predictions = predictions_df.groupby('Date').mean().reset_index()

# Calcular as métricas para o período completo
overall_mse = mean_squared_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(grouped_predictions['Actual'], grouped_predictions['Predicted'])
overall_r2 = r2_score(grouped_predictions['Actual'], grouped_predictions['Predicted'])

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# Exibir previsões para um período específico
desired_prediction_period = grouped_predictions[
    (grouped_predictions['Date'] >= pd.to_datetime('2023-07-01')) &
    (grouped_predictions['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)

  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 385.3579984379849, RMSE: 19.6305373955474, MAE: 14.394322497577262, R2: -0.3055830304338223
          Date  Actual  Predicted
374 2023-07-03   -1.11  -7.502190
375 2023-07-04    0.22  -8.868158
376 2023-07-05   -2.45  -4.743711
377 2023-07-06    7.16  -1.346722
378 2023-07-07   -0.22  -4.922405


1 DIA WINDOW 1

In [33]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=100)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")


desired_prediction_period = predictions_df[
    (predictions_df['Date'] == '2023-06-06') 
]


print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 396.3335734345713, RMSE: 19.90812832575105, MAE: 15.13707896596086, R2: -0.3399491229296603
          Date  Actual  Predicted
356 2023-06-06   -3.41   8.995623
