HOLD - OUT

In [77]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

def remove_outliers(df, column_names):
    for column in column_names:
        Q1 = df[column].quantile(0.25)  # Quartil inferior (25%)
        Q3 = df[column].quantile(0.75)  # Quartil superior (75%)
        IQR = Q3 - Q1  # Intervalo Interquartil
        lower_bound = Q1 - 1 * IQR  # Limite inferior
        upper_bound = Q3 + 1.6 * IQR  # Limite superior
        df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df


def normalize_data(train, test):
    scaler = StandardScaler()
    scaler.fit(train)
    return scaler.transform(train), scaler.transform(test)

def add_moving_average(data, window_size=5):
    data[f'Moving_Average_{window_size}'] = data['Dif_Preco_Dolar'].rolling(window=window_size).mean().shift(1)
    return data

file_paths = ['C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/taxa_br.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/bolsa_eua.csv',
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv']

for file_path in file_paths:
    data = pd.read_csv(file_path)

    data['Data'] = pd.to_datetime(data['Data'])
    data = data.iloc[1:]

    numeric_columns = data.select_dtypes(include=[np.number]).columns.tolist()
    data = remove_outliers(data, numeric_columns)

    data = add_moving_average(data)

    train_data = data[data['Data'] < pd.to_datetime("2020-01-01")]
    test_data = data[data['Data'] >= pd.to_datetime("2020-01-01")]

    y_train = train_data['Dif_Preco_Dolar']
    X_train = train_data.drop(columns=['Data', 'Preco_Dolar', 'Preco_Real', 'Dif_Preco_Real', 'Dif_Preco_Dolar', f'Moving_Average_{5}'])

    y_test = test_data['Dif_Preco_Dolar']
    X_test = test_data.drop(columns=['Data', 'Preco_Dolar', 'Preco_Real', 'Dif_Preco_Real', 'Dif_Preco_Dolar', f'Moving_Average_{5}'])

    mean_values_train = X_train.mean()
    X_train.fillna(mean_values_train, inplace=True)

    mean_values_test = X_test.mean()
    X_test.fillna(mean_values_test, inplace=True)

    non_numeric_columns = X_train.select_dtypes(include=['object']).columns
    for col in non_numeric_columns:
        X_train[col] = pd.to_numeric(X_train[col].str.replace('.', '').str.replace(',', '.'), errors='coerce')
        X_test[col] = pd.to_numeric(X_test[col].str.replace('.', '').str.replace(',', '.'), errors='coerce')

    X_train, X_test = normalize_data(X_train, X_test)

    model = LinearRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Resultados para {file_path}:")
    print("MSE:", mse)
    print("MAE:", mae)
    print("R^2:", r2)
    print()

    resultados = pd.DataFrame()
    resultados['Data'] = test_data['Data'].reset_index(drop=True)  
    resultados['Valor_Real'] = y_test.reset_index(drop=True)  
    resultados['Valor_Previsto'] = y_pred  

  
    data_especifica = pd.to_datetime("2023-09-27")
    previsao_especifica = resultados[resultados['Data'] == data_especifica]

    if not previsao_especifica.empty:
        print(previsao_especifica)
    else:
        print(f"Não há dados disponíveis para a data {data_especifica.date()}")



Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv:
MSE: 0.9242547535373863
MAE: 0.8195474799335131
R^2: 0.2574118228462988

         Data  Valor_Real  Valor_Previsto
33 2023-09-27       -1.16       -0.114589
Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv:
MSE: 1.1759177007343675
MAE: 0.8606942689691257
R^2: 0.38817729699531445

         Data  Valor_Real  Valor_Previsto
65 2023-09-27       -1.16       -1.099051
Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv:
MSE: 0.9328140745206313
MAE: 0.8996845610794891
R^2: -11.334731563909171

Não há dados disponíveis para a data 2023-09-27
Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/taxa_br.csv:
MSE: 3.227176679437702
MAE: 1.508802791309408
R^2: -0.039116700870576615

Não há dados disponíveis para a data 2023-09-27
Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/bolsa_eua.csv:
MSE: 2.331013187781124
MAE: 1.2260567745813236
R^2: -0.0101958362020

JANELA DESLIZANTE DE 1 DIA

In [78]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np


def remove_outliers(df, column_names):
    for column in column_names:
        Q1 = df[column].quantile(0.25)  # Quartil inferior (25%)
        Q3 = df[column].quantile(0.75)  # Quartil superior (75%)
        IQR = Q3 - Q1  # Intervalo Interquartil
        lower_bound = Q1 - 1 * IQR  # Limite inferior
        upper_bound = Q3 + 1.6 * IQR  # Limite superior
        df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df

def create_sliding_windows(data, window_size):
    windows = []
    for i in range(len(data) - window_size + 1):
        window = data[i:i + window_size]
        windows.append(window)
    return np.array(windows)

def normalize_data(train, test):
    scaler = StandardScaler()
    scaler.fit(train)
    return scaler.transform(train), scaler.transform(test)

def add_moving_average(data, window_size=1):
    data[f'Moving_Average_{window_size}'] = data['Dif_Preco_Dolar'].rolling(window=window_size).mean().shift(1)
    return data

file_paths = ['C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/taxa_br.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/bolsa_eua.csv','C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv']

for file_path in file_paths:
    data = pd.read_csv(file_path)

    data['Data'] = pd.to_datetime(data['Data'])
    data = data.iloc[1:]

    numeric_columns = data.select_dtypes(include=[np.number]).columns.tolist()
    data = remove_outliers(data, numeric_columns)

    data = add_moving_average(data)

    train_data = data[data['Data'] < pd.to_datetime("2020-01-01")]
    test_data = data[data['Data'] >= pd.to_datetime("2020-01-01")]

    y_train = train_data['Dif_Preco_Dolar']
    X_train = train_data.drop(columns=['Data', 'Preco_Dolar', 'Preco_Real', 'Dif_Preco_Real', 'Dif_Preco_Dolar', f'Moving_Average_{1}'])

    y_test = test_data['Dif_Preco_Dolar']
    X_test = test_data.drop(columns=['Data', 'Preco_Dolar', 'Preco_Real', 'Dif_Preco_Real', 'Dif_Preco_Dolar', f'Moving_Average_{1}'])

    mean_values_train = X_train.mean()
    X_train.fillna(mean_values_train, inplace=True)

    mean_values_test = X_test.mean()
    X_test.fillna(mean_values_test, inplace=True)

    non_numeric_columns = X_train.select_dtypes(include=['object']).columns
    for col in non_numeric_columns:
        X_train[col] = pd.to_numeric(X_train[col].str.replace('.', '').str.replace(',', '.'), errors='coerce')
        X_test[col] = pd.to_numeric(X_test[col].str.replace('.', '').str.replace(',', '.'), errors='coerce')

    X_train, X_test = normalize_data(X_train, X_test)

    model = LinearRegression()
    model.fit(X_train, y_train)

    window_size = 1

    test_windows = create_sliding_windows(X_test, window_size)

    predictions = []
    for window in test_windows:
        pred = model.predict(window)
        predictions.append(pred[-1]) 

    y_test_adjusted = y_test[window_size - 1:]

    mse = mean_squared_error(y_test_adjusted, predictions)
    mae = mean_absolute_error(y_test_adjusted, predictions)
    r2 = r2_score(y_test_adjusted, predictions)

    print(f"Resultados para {file_path}:")
    print("MSE:", mse)
    print("MAE:", mae)
    print("R^2:", r2)
    print()

    resultados = pd.DataFrame()
    resultados['Data'] = test_data['Data'].iloc[window_size - 1:].reset_index(drop=True) 
    resultados['Valor_Real'] = y_test_adjusted.reset_index(drop=True)  
    resultados['Valor_Previsto'] = predictions 

    data_especifica = pd.to_datetime("2023-09-27")
    previsao_especifica = resultados[resultados['Data'] == data_especifica]

    if not previsao_especifica.empty:
        print(f"Previsão para {data_especifica.date()}:")
        print(previsao_especifica)
    else:
        print(f"Não há dados disponíveis para a data {data_especifica.date()}")

    print(f"Resultados para {file_path}:")
    print(resultados.tail(30)) 
    print()


Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv:
MSE: 0.9242547535373854
MAE: 0.8195474799335127
R^2: 0.2574118228462996

Previsão para 2023-09-27:
         Data  Valor_Real  Valor_Previsto
33 2023-09-27       -1.16       -0.114589
Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv:
         Data  Valor_Real  Valor_Previsto
7  2020-01-17        1.16        0.733959
8  2020-01-21       -1.00       -0.141820
9  2020-01-22        1.19        0.494489
10 2020-01-23        0.52        0.514807
11 2020-01-24       -1.34        0.094114
12 2020-01-28        0.53        0.258073
13 2020-01-29       -1.92       -0.374089
14 2020-01-30       -1.22       -0.068152
15 2020-01-31        0.28       -0.737515
16 2020-02-04       -0.38        0.031434
17 2020-02-05        1.04        0.652097
18 2020-02-06       -0.19       -0.478509
19 2020-02-07       -0.42       -0.204484
20 2020-02-10        0.60        0.222666
21 2020-02-11        0.62       -0.044406
22 

JANELA DESLIZANTE DE 30 DIAS

In [80]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

def remove_outliers(df, column_names):
    for column in column_names:
        Q1 = df[column].quantile(0.25)  # Quartil inferior (25%)
        Q3 = df[column].quantile(0.75)  # Quartil superior (75%)
        IQR = Q3 - Q1  # Intervalo Interquartil
        lower_bound = Q1 - 1 * IQR  # Limite inferior
        upper_bound = Q3 + 1.6 * IQR  # Limite superior
        df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df

def create_sliding_windows(data, window_size):
    windows = []
    for i in range(len(data) - window_size + 1):
        window = data[i:i + window_size]
        windows.append(window)
    return np.array(windows)

def normalize_data(train, test):
    scaler = StandardScaler()
    scaler.fit(train)
    return scaler.transform(train), scaler.transform(test)

def add_moving_average(data, window_size=30):  # Alterado para 30 dias
    data[f'Moving_Average_{window_size}'] = data['Dif_Preco_Dolar'].rolling(window=window_size).mean().shift(1)
    return data

file_paths = ['C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/taxa_br.csv', 
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/bolsa_eua.csv',
              'C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv']

window_size = 30  # Alterado para 30 dias

for file_path in file_paths:
    data = pd.read_csv(file_path)

    data['Data'] = pd.to_datetime(data['Data'])
    data = data.iloc[1:]

    numeric_columns = data.select_dtypes(include=[np.number]).columns.tolist()
    data = remove_outliers(data, numeric_columns)

    data = add_moving_average(data, window_size)  # Aplicando a janela de 30 dias

    train_data = data[data['Data'] < pd.to_datetime("2020-01-01")]
    test_data = data[data['Data'] >= pd.to_datetime("2020-01-01")]

    y_train = train_data['Dif_Preco_Dolar']
    X_train = train_data.drop(columns=['Data', 'Preco_Dolar', 'Preco_Real', 'Dif_Preco_Real', 'Dif_Preco_Dolar', f'Moving_Average_{window_size}'])

    y_test = test_data['Dif_Preco_Dolar']
    X_test = test_data.drop(columns=['Data', 'Preco_Dolar', 'Preco_Real', 'Dif_Preco_Real', 'Dif_Preco_Dolar', f'Moving_Average_{window_size}'])

    mean_values_train = X_train.mean()
    X_train.fillna(mean_values_train, inplace=True)

    mean_values_test = X_test.mean()
    X_test.fillna(mean_values_test, inplace=True)

    non_numeric_columns = X_train.select_dtypes(include=['object']).columns
    for col in non_numeric_columns:
        X_train[col] = pd.to_numeric(X_train[col].str.replace('.', '').str.replace(',', '.'), errors='coerce')
        X_test[col] = pd.to_numeric(X_test[col].str.replace('.', '').str.replace(',', '.'), errors='coerce')

    X_train, X_test = normalize_data(X_train, X_test)

    model = LinearRegression()
    model.fit(X_train, y_train)

    test_windows = create_sliding_windows(X_test, window_size)  # Uso da janela de 30 dias

    if len(test_windows) > 0:
        predictions = []
        for window in test_windows:
            pred = model.predict(window)
            predictions.append(pred[-1]) 

        if len(predictions) > 0:
            y_test_adjusted = y_test[window_size - 1:]  # Ajustar o corte de `y_test`

            if len(y_test_adjusted) == len(predictions):
                mse = mean_squared_error(y_test_adjusted, predictions)
                mae = mean_absolute_error(y_test_adjusted, predictions)
                r2 = r2_score(y_test_adjusted, predictions)

                print(f"Resultados para {file_path}:")
                print("MSE:", mse)
                print("MAE:", mae)
                print("R^2:", r2)
                print()

                # Restante do código para resultados e previsões específicas

            else:
                print(f"O número de previsões não corresponde ao número de amostras de teste para {file_path}.")
        else:
            print(f"Nenhuma previsão foi gerada para {file_path}.")
    else:
        print(f"Não foi possível criar janelas de teste para {file_path}.")


Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/euro.csv:
MSE: 0.7591122237207126
MAE: 0.7303706414087139
R^2: 0.3053472133046947

Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/dolar.csv:
MSE: 1.3953870881638044
MAE: 0.9017695966403994
R^2: 0.44135051291652727

Não foi possível criar janelas de teste para C:/Users/milen/OneDrive/Documentos/TCC/Bases/base_completa.csv.
Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/taxa_br.csv:
MSE: 4.068304787455339
MAE: 1.712412726085694
R^2: -0.004218710498925526

Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/bolsa_eua.csv:
MSE: 2.4320836116285034
MAE: 1.2499836210161794
R^2: -0.02151155955207562

Resultados para C:/Users/milen/OneDrive/Documentos/TCC/Bases/clima.csv:
MSE: 2.7858000001296874
MAE: 1.3483658245793333
R^2: 0.05506811218507357

