In [19]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=100)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 396.3335734345713, RMSE: 19.90812832575105, MAE: 15.13707896596086, R2: -0.3399491229296603
          Date  Actual  Predicted
374 2023-07-03   -1.11 -15.314046
375 2023-07-04    0.22 -15.184363
376 2023-07-05   -2.45 -15.745965
377 2023-07-06    7.16  -8.330201
378 2023-07-07   -0.22  -7.311473


In [13]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 315.74337814663494, RMSE: 17.769169315042134, MAE: 13.41529832454101, R2: -0.06748479305469601
          Date  Actual  Predicted
374 2023-07-03   -1.11  -6.370439
375 2023-07-04    0.22  -7.605386
376 2023-07-05   -2.45  -3.382757
377 2023-07-06    7.16  -3.253150
378 2023-07-07   -0.22  -9.897977


In [14]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 426.71428730335106, RMSE: 20.657063859690975, MAE: 15.116748338539963, R2: -0.44266212437859975
          Date  Actual  Predicted
374 2023-07-03   -1.11  -4.885148
375 2023-07-04    0.22  -6.069565
376 2023-07-05   -2.45  -1.513067
377 2023-07-06    7.16   5.000689
378 2023-07-07   -0.22  -1.259574


In [15]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/ibovespa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 386.9768127233689, RMSE: 19.671726226322104, MAE: 14.871742986441316, R2: -0.30831520607576013
          Date  Actual  Predicted
374 2023-07-03   -1.11   7.204421
375 2023-07-04    0.22   3.600235
376 2023-07-05   -2.45  -1.638054
377 2023-07-06    7.16 -13.234259
378 2023-07-07   -0.22  -3.461109


In [16]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 460.0685414016277, RMSE: 21.44920840967395, MAE: 15.929329337605058, R2: -0.5554282550337837
          Date  Actual  Predicted
374 2023-07-03   -1.11  -6.776595
375 2023-07-04    0.22  -0.526764
376 2023-07-05   -2.45  -4.549476
377 2023-07-06    7.16  -1.535267
378 2023-07-07   -0.22  -3.553224


30 DIAS

In [18]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 442.6820671156061, RMSE: 21.04001110065311, MAE: 15.79041525471247, R2: -0.4966469845789361
          Date  Actual  Predicted
374 2023-07-03   -1.11  -8.505659
375 2023-07-04    0.22  -4.532252
376 2023-07-05   -2.45  -6.252424
377 2023-07-06    7.16   0.150635
378 2023-07-07   -0.22  -2.835280


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({


In [20]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 323.98360949534475, RMSE: 17.999544702445803, MAE: 13.528114205646684, R2: -0.09534387820046675
          Date  Actual  Predicted
374 2023-07-03   -1.11 -16.242306
375 2023-07-04    0.22  -6.513949
376 2023-07-05   -2.45 -17.532143
377 2023-07-06    7.16  -7.005100
378 2023-07-07   -0.22  -3.776659


  predictions_df = predictions_df.append(pd.DataFrame({


In [21]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 393.36659874263745, RMSE: 19.833471676502768, MAE: 14.926131824062614, R2: -0.329918190899958
          Date  Actual  Predicted
374 2023-07-03   -1.11 -11.563433
375 2023-07-04    0.22  -4.815076
376 2023-07-05   -2.45 -11.632878
377 2023-07-06    7.16   5.089793
378 2023-07-07   -0.22   6.360083


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({


In [22]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/ibovespa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 395.5338921482305, RMSE: 19.888033893480536, MAE: 15.111437561270549, R2: -0.337245510846107
          Date  Actual  Predicted
374 2023-07-03   -1.11   6.815723
375 2023-07-04    0.22   2.645179
376 2023-07-05   -2.45 -17.505465
377 2023-07-06    7.16 -11.555002
378 2023-07-07   -0.22   0.805214


  predictions_df = predictions_df.append(pd.DataFrame({


In [23]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/selic.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 442.6820671156061, RMSE: 21.04001110065311, MAE: 15.79041525471247, R2: -0.4966469845789361
          Date  Actual  Predicted
374 2023-07-03   -1.11  -8.505659
375 2023-07-04    0.22  -4.532252
376 2023-07-05   -2.45  -6.252424
377 2023-07-06    7.16   0.150635
378 2023-07-07   -0.22  -2.835280


In [24]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert 'Date' to datetime
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Calculate day-to-day price difference
dolar_data['Price_Diff'] = dolar_data['Preco_Real'].diff()

# Drop the first row as it will have a NaN value for 'Price_Diff'
dolar_data = dolar_data.dropna()

# Configuration for test period
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor
xgb_regressor = xgb.XGBRegressor(random_state=42)

# Prepare dataframes for storing predictions and metrics
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])
all_actuals = []
all_predictions = []

# Training the model up to the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
y_train = train_data['Price_Diff']
xgb_regressor.fit(X_train, y_train)

# Testing the model in each day window
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='30D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=30)

    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()

    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    if test_window.empty:
        continue

    X_test = test_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_test = test_window['Price_Diff']
    
    # Re-train the model on each iteration (if this is intended)
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Price_Diff', 'Date'], axis=1)
    y_train = train_window['Price_Diff']
    xgb_regressor.fit(X_train, y_train)

    y_pred = xgb_regressor.predict(X_test)

    # Append results to dataframe and accumulate for overall metrics
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    all_actuals.extend(y_test)
    all_predictions.extend(y_pred)

# Calculate overall metrics for the entire period
overall_mse = mean_squared_error(all_actuals, all_predictions)
overall_rmse = np.sqrt(overall_mse)
overall_mae = mean_absolute_error(all_actuals, all_predictions)
overall_r2 = r2_score(all_actuals, all_predictions)

print("Overall Metrics for the Entire Validation Period:")
print(f"MSE: {overall_mse}, RMSE: {overall_rmse}, MAE: {overall_mae}, R2: {overall_r2}")

# If you want to see predictions for a specific period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

print(desired_prediction_period)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

Overall Metrics for the Entire Validation Period:
MSE: 386.49697900042554, RMSE: 19.659526418518467, MAE: 14.974858687231789, R2: -0.3066929544692705
          Date  Actual  Predicted
374 2023-07-03   -1.11 -15.688412
375 2023-07-04    0.22 -20.903116
376 2023-07-05   -2.45 -14.772523
377 2023-07-06    7.16 -17.650747
378 2023-07-07   -0.22 -18.677464


  predictions_df = predictions_df.append(pd.DataFrame({
