In [6]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset from your CSV file
dolar_file_path = 'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv'
dolar_data = pd.read_csv(dolar_file_path)

# Convert the 'Date' column to datetime type
dolar_data['Date'] = pd.to_datetime(dolar_data['Date'])

# Configuring the test period to start from 2022
start_test_date = pd.to_datetime('2022-01-01')
end_test_date = pd.to_datetime('2032-08-01')

# Initialize the XGBoost Regressor model
xgb_regressor = xgb.XGBRegressor(random_state=42)

# List to store metrics and predictions for each 30-day test window
metrics = []
predictions_df = pd.DataFrame(columns=['Date', 'Actual', 'Predicted'])

# Train the model up until the start of the first test window
train_data = dolar_data[dolar_data['Date'] < start_test_date]
X_train = train_data.drop(['Preco_Real', 'Date'], axis=1)
y_train = train_data['Preco_Real']
xgb_regressor.fit(X_train, y_train)

# Sliding window: Test the model in each 30-day window from start_test_date to end_test_date
for current_date in pd.date_range(start=start_test_date, end=end_test_date, freq='1D'):
    test_start = current_date
    test_end = current_date + pd.Timedelta(days=1)
    
    # Check if the end date exceeds the dataset's last date
    if test_end > dolar_data['Date'].max():
        test_end = dolar_data['Date'].max()
    
    # Test data for the current window
    test_window = dolar_data[(dolar_data['Date'] >= test_start) & (dolar_data['Date'] < test_end)]
    
    # If no data for the current window, skip to the next window
    if test_window.empty:
        continue
    
    X_test = test_window.drop(['Preco_Real', 'Date'], axis=1)
    y_test = test_window['Preco_Real']
    
    # Re-train the model with the data before the current test window
    xgb_regressor = xgb.XGBRegressor(random_state=42)
    train_window = dolar_data[dolar_data['Date'] < test_start]
    X_train = train_window.drop(['Preco_Real', 'Date'], axis=1)
    y_train = train_window['Preco_Real']
    
    xgb_regressor.fit(X_train, y_train)

    # Make predictions and calculate metrics
    y_pred = xgb_regressor.predict(X_test)
    predictions_df = predictions_df.append(pd.DataFrame({
        'Date': test_window['Date'],
        'Actual': y_test,
        'Predicted': y_pred
    }), ignore_index=True)
    
    # Calculate and store metrics for this window
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    metrics.append((test_start, mse, rmse, mae, r2))
    
    # Update the current date to the end of the test window
    current_date = test_end

# Convert the metrics to a DataFrame for easier viewing and analysis
metrics_df = pd.DataFrame(metrics, columns=['Start Date', 'MSE', 'RMSE', 'MAE', 'R2'])

# Filter the DataFrame for the desired prediction period
desired_prediction_period = predictions_df[
    (predictions_df['Date'] >= pd.to_datetime('2023-07-01')) &
    (predictions_df['Date'] <= pd.to_datetime('2023-07-09'))
]

# Print the desired prediction period
print(desired_prediction_period)
print(metrics_df)


  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.DataFrame({
  predictions_df = predictions_df.append(pd.Data

          Date  Actual   Predicted
374 2023-07-03  824.48  825.717712
375 2023-07-04  824.70  836.998169
376 2023-07-05  822.25  829.514343
377 2023-07-06  829.41  827.522034
378 2023-07-07  829.19  830.053162
    Start Date         MSE       RMSE        MAE  R2
0   2022-01-03  732.937705  27.072822  27.072822 NaN
1   2022-01-04  690.817290  26.283403  26.283403 NaN
2   2022-01-05   51.158117   7.152490   7.152490 NaN
3   2022-01-06  420.753041  20.512266  20.512266 NaN
4   2022-01-07  832.336305  28.850239  28.850239 NaN
..         ...         ...        ...        ...  ..
452 2023-10-23   71.615931   8.462620   8.462620 NaN
453 2023-10-24  460.761737  21.465361  21.465361 NaN
454 2023-10-25    7.537532   2.745457   2.745457 NaN
455 2023-10-26  601.250993  24.520420  24.520420 NaN
456 2023-10-27  254.077062  15.939795  15.939795 NaN

[457 rows x 5 columns]
