In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

In [2]:

data = pd.read_csv('train.csv')

data['Date'] = pd.to_datetime(data['Date'])
data['Date'] = data['Date'].dt.year * 10000 + data['Date'].dt.month * 100 + data['Date'].dt.day

data.dropna(inplace=True)

X = data.drop(['Strategy', 'Close'], axis=1)
y_strategy = data['Strategy']
y_close = data['Close']

label_encoder = LabelEncoder()
y_strategy_encoded = label_encoder.fit_transform(y_strategy)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [3]:
# Split data into train and test sets
X_train, X_test, strategy_train, strategy_test, close_train, close_test = train_test_split(
    X_scaled, y_strategy_encoded, y_close, test_size=0.3, random_state=42
)

# Hyperparameter tuning for RandomForestClassifier
param_grid_classifier = {
    'n_estimators': [100, 150, 200],
    'max_depth': [15, 20, 25],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search_classifier = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                                      param_grid=param_grid_classifier, cv=5, n_jobs=-1)
grid_search_classifier.fit(X_train, strategy_train)
best_classifier = grid_search_classifier.best_estimator_

In [4]:
param_grid_regressor = {
    'n_estimators': [100, 150, 200],
    'max_depth': [15, 20, 25],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search_regressor = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                                     param_grid=param_grid_regressor, cv=5, n_jobs=-1)
grid_search_regressor.fit(X_train, close_train)
best_regressor = grid_search_regressor.best_estimator_

In [5]:
strategy_pred = best_classifier.predict(X_test)
close_pred = best_regressor.predict(X_test)

decoded_strategy_pred = label_encoder.inverse_transform(strategy_pred)

strategy_accuracy = accuracy_score(strategy_test, strategy_pred)
print(f'Strategy Accuracy: {strategy_accuracy:.2f}')

close_rmse = np.sqrt(mean_squared_error(close_test, close_pred))
print(f'Close RMSE: {close_rmse:.2f}')

Strategy Accuracy: 0.67
Close RMSE: 4.18
