In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

In [7]:
# Sample dataset (assuming you have your preprocessed data in a DataFrame called df)
data = pd.read_csv('train.csv')
df = pd.DataFrame(data)

# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Extract date components and concatenate them into a single numerical value
# data['DateNumerical'] = data['Date'].dt.year * 10000 + data['Date'].dt.month * 100 + data['Date'].dt.day
df['Date'] = df['Date'].dt.year * 10000 + df['Date'].dt.month * 100 + df['Date'].dt.day
# Print the updated DataFrame
print(df)

      id      Date       Open      Close     Volume Strategy
0      0  20150601  66.208486  75.609978  298506300     Hold
1      1  20150608  64.116235  74.443331  227974800     Hold
2      2  20150615  47.701942  71.140831  250670900      Buy
3      3  20150622  54.754816  72.881344  223614300     Hold
4      4  20150629  48.031899  66.284718  406814900      Buy
..   ...       ...        ...        ...        ...      ...
295  295  20210125  81.284821  83.373498  117281600     Sell
296  296  20210201  71.970249  85.294903  177655800     Sell
297  297  20210208  71.784627  83.266453  146003500     Hold
298  298  20210215  69.372333  81.026827  167762500     Hold
299  299  20210222  69.618620  78.672007  332979200     Hold

[300 rows x 6 columns]


In [8]:
# Separate features and target variables
features = df.drop(columns=['Strategy', 'Close'])
strategy_labels = df['Strategy']
close_labels = df['Close']

# Standardize features (optional but recommended for many machine learning models)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Encode 'Strategy' labels
strategy_label_encoder = LabelEncoder()
strategy_labels_encoded = strategy_label_encoder.fit_transform(strategy_labels)

# Split the data into training and testing sets
X_train, X_test, strategy_train, strategy_test, close_train, close_test = train_test_split(
    scaled_features, strategy_labels_encoded, close_labels, test_size=0.2, random_state=42
)

In [9]:
# Use Random Forest Classifier for 'Strategy' prediction
strategy_classifier = RandomForestClassifier(random_state=42)
strategy_classifier.fit(X_train, strategy_train)
strategy_pred = strategy_classifier.predict(X_test)

# Use Random Forest Regressor for 'Close' prediction
close_regressor = RandomForestRegressor(random_state=42)
close_regressor.fit(X_train, close_train)
close_pred = close_regressor.predict(X_test)

# Decode strategy predictions back to original labels
strategy_pred_labels = strategy_label_encoder.inverse_transform(strategy_pred)

In [10]:
# Calculate accuracy for 'Strategy' prediction
strategy_accuracy = accuracy_score(strategy_test, strategy_pred)

# Calculate RMSE for 'Close' prediction
close_rmse = np.sqrt(mean_squared_error(close_test, close_pred))

print(f'Strategy Accuracy: {strategy_accuracy:.2f}')
print(f'Close RMSE: {close_rmse:.2f}')

# Print example predictions
print("Example Strategy Predictions:", strategy_pred_labels[:5])
print("Example Close Predictions:", close_pred[:5])


Strategy Accuracy: 0.72
Close RMSE: 4.03
Example Strategy Predictions: ['Hold' 'Hold' 'Hold' 'Buy' 'Buy']
Example Close Predictions: [91.25242525 80.48178998 92.67501891 57.4126402  83.96640237]
