In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from xgboost import XGBClassifier

In [13]:
def prepare_data(dataset_path, forecast_days=21, threshold=0.01):
    """
    Prepara os dados para treinamento e teste.
    """
    # Carregar o dataset
    dataset = pd.read_csv(dataset_path)
    dataset['Date'] = pd.to_datetime(dataset['Date'])
    for col in ['Close/Last', 'Open', 'High', 'Low']:
        dataset[col] = dataset[col].replace({'$': ''}, regex=True).astype(float)
    dataset['Volume'] = dataset['Volume'].astype(float)

    # Criar variável alvo
    dataset['Future_Return'] = (dataset['Close/Last'].shift(-forecast_days) - dataset['Close/Last']) / dataset['Close/Last']
    dataset['Target'] = (dataset['Future_Return'] > threshold).astype(int)

    # Remover valores nulos
    dataset.dropna(inplace=True)

    # Criar features adicionais
    dataset['SMA_20'] = dataset['Close/Last'].rolling(window=20).mean()
    dataset['SMA_50'] = dataset['Close/Last'].rolling(window=50).mean()
    dataset['Volatility_30'] = dataset['Close/Last'].rolling(window=30).std()
    dataset['Avg_Volume_30'] = dataset['Volume'].rolling(window=30).mean()

    # Remover valores nulos gerados pelas janelas móveis
    dataset.dropna(inplace=True)

    # Selecionar features e variável alvo
    X = dataset[['SMA_20', 'SMA_50', 'Volatility_30', 'Avg_Volume_30']].values
    y = dataset['Target'].values

    # Dividir em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return dataset, X_train, X_test, y_train, y_test

In [14]:
def train_model(X_train, y_train):
    """
    Treina o modelo XGBoost.
    """
    model = XGBClassifier(eval_metric='logloss', random_state=42)
    model.fit(X_train, y_train)
    return model

In [15]:
def predict_model(model, X_test):
    """
    Faz previsões usando o modelo treinado.
    """
    return model.predict(X_test)

In [18]:
dataset_path = 'C:/Users/biel_/OneDrive/Documentos/Faculdade/Fase 3/amazon_stock_ml/HistoricalData_1731547025648.csv'
dataset, X_train, X_test, y_train, y_test = prepare_data(dataset_path)
model = train_model(X_train, y_train)
y_pred = predict_model(model, X_test)

ValueError: could not convert string to float: '$208.91'

In [None]:
# Avaliar o modelo
accuracy = accuracy_score(y_test, y_pred)
print(f"Acurácia do modelo: {accuracy:.2f}")
print("\nMatriz de Confusão:")
print(confusion_matrix(y_test, y_pred))
print("\nRelatório de Classificação:")
print(classification_report(y_test, y_pred))