In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler

# Wczytanie i przygotowanie danych
data = pd.read_csv('transformed_data.csv')
data_class = data.copy()

# Binarna klasyfikacja: 1 jeśli LogReturn > 0, w przeciwnym razie 0
data_class['LogReturn'] = (data_class['LogReturn'] > 0).astype(int)
data_class['date'] = pd.to_datetime(data_class['date'])

# Filtrowanie danych przed 2019 rokiem
data_class = data_class.loc[data_class['date'].dt.year < 2019]

# Zmienne niezależne i zależna
X = data_class.drop(['LogReturn', 'date'], axis=1)
Y = data_class['LogReturn']

# Skalowanie danych
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Podział na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

# Inicjalizacja i trenowanie Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)

# Predykcja i ewaluacja
y_pred = gb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Gradient Boosting Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Gradient Boosting Accuracy: 79.09%

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.75      0.77       207
           1       0.79      0.82      0.81       233

    accuracy                           0.79       440
   macro avg       0.79      0.79      0.79       440
weighted avg       0.79      0.79      0.79       440

Confusion Matrix:
 [[156  51]
 [ 41 192]]


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler

# Wczytanie danych
data = pd.read_csv('transformed_data.csv')
data_class = data.copy()

# Binarna klasyfikacja
data_class['LogReturn'] = (data_class['LogReturn'] > 0).astype(int)
data_class['date'] = pd.to_datetime(data_class['date'])
data_class = data_class.loc[data_class['date'].dt.year < 2019]

# Zmienne
X = data_class.drop(['LogReturn', 'date'], axis=1)
Y = data_class['LogReturn']

# Skalowanie
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Podział
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

# Parametry do przeszukania
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 1.0],
    'min_samples_split': [2, 5],
}

# GridSearch
gb = GradientBoostingClassifier(random_state=42)
grid_search = GridSearchCV(gb, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Najlepszy model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Best Gradient Boosting Accuracy: {accuracy * 100:.2f}%")
print("Best Parameters:", grid_search.best_params_)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best Gradient Boosting Accuracy: 78.64%
Best Parameters: {'learning_rate': 0.2, 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 200, 'subsample': 0.8}

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.77      0.77       207
           1       0.80      0.80      0.80       233

    accuracy                           0.79       440
   macro avg       0.79      0.79      0.79       440
weighted avg       0.79      0.79      0.79       440

Confusion Matrix:
 [[159  48]
 [ 46 187]]


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler

# Wczytanie danych
data = pd.read_csv('transformed_data.csv')
data_class = data.copy()
data_class['date'] = pd.to_datetime(data_class['date'])

# Ograniczenie danych do okresu przed 2019
data_class = data_class.loc[data_class['date'].dt.year < 2019]

# Tworzenie 4 klas na podstawie kwartyli
data_class['LogReturnClass'] = pd.qcut(data_class['LogReturn'], q=4, labels=False)

# Zmienne X i Y
X = data_class.drop(['LogReturn', 'LogReturnClass', 'date'], axis=1)
Y = data_class['LogReturnClass']

# Skalowanie
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Podział na zbiory
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42, stratify=Y)

# Parametry do przeszukania
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1],
    'max_depth': [3, 4],
    'subsample': [0.8, 1.0],
}

# GridSearch
gb = GradientBoostingClassifier(random_state=42)
grid_search = GridSearchCV(gb, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Najlepszy model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Wyniki
print(f"\nBest Gradient Boosting Accuracy (4-class): {accuracy * 100:.2f}%")
print("Best Parameters:", grid_search.best_params_)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 3 folds for each of 16 candidates, totalling 48 fits

Best Gradient Boosting Accuracy (4-class): 58.18%
Best Parameters: {'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 200, 'subsample': 0.8}

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.69      0.68       110
           1       0.45      0.49      0.47       110
           2       0.49      0.45      0.47       110
           3       0.73      0.69      0.71       110

    accuracy                           0.58       440
   macro avg       0.58      0.58      0.58       440
weighted avg       0.58      0.58      0.58       440

Confusion Matrix:
 [[76 26  6  2]
 [19 54 29  8]
 [13 29 50 18]
 [ 5 11 18 76]]
