In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Load the test data
test_data = pd.read_csv('test.csv')

# Preprocess 'Date' column for test data
test_data['Date'] = pd.to_datetime(test_data['Date'])
test_data['Date'] = test_data['Date'].dt.year * 10000 + test_data['Date'].dt.month * 100 + test_data['Date'].dt.day

# Standardize features for test data
X_test_scaled = scaler.transform(test_data)

# Make predictions for 'Strategy'
strategy_pred = best_classifier.predict(X_test_scaled)

# Decode 'Strategy' predictions
decoded_strategy_pred = label_encoder.inverse_transform(strategy_pred)

# Make predictions for 'Close'
close_pred = best_regressor.predict(X_test_scaled)

# Output predictions to a DataFrame
predictions_df = pd.DataFrame({
    'id': test_data['id'],  # Include 'id' column in the predictions
    'Date': test_data['Date'],
    'Strategy': decoded_strategy_pred,
    'Close': close_pred
})

# Save predictions to a CSV file
predictions_df[['id', 'Date',  'Close', 'Strategy']].to_csv('sample_submission5.csv', index=False)

print("Predictions saved to 'sample_submission5.csv'")


Classification Accuracy: 0.7166666666666667
Regression Mean Squared Error: 16.85347161932812
Predictions saved to 'sample_submission5.csv'


In [None]:

# Load the training data
train_data = pd.read_csv('train.csv')

# Preprocess 'Date' column
train_data['Date'] = pd.to_datetime(train_data['Date'])
train_data['Date'] = train_data['Date'].dt.year * 10000 + train_data['Date'].dt.month * 100 + train_data['Date'].dt.day

# Drop rows with missing values
train_data.dropna(inplace=True)

# Split features and labels for training
X_train = train_data.drop(['Strategy', 'Close'], axis=1)
y_strategy_train = train_data['Strategy']
y_close_train = train_data['Close']

# Encode 'Strategy' labels
label_encoder = LabelEncoder()
y_strategy_encoded_train = label_encoder.fit_transform(y_strategy_train)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Split the data into training and validation sets for both tasks
X_train_strategy, X_val_strategy, y_strategy_train_split, y_strategy_val_split = train_test_split(
    X_train_scaled, y_strategy_encoded_train, test_size=0.2, random_state=42
)

In [None]:
X_train_close, X_val_close, y_close_train_split, y_close_val_split = train_test_split(
    X_train_scaled, y_close_train, test_size=0.2, random_state=42
)

# Hyperparameter tuning for RandomForestClassifier
param_dist_classifier = {
    'n_estimators': np.arange(100, 201, 10),
    'max_depth': np.arange(10, 31, 5),
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_search_classifier = RandomizedSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_distributions=param_dist_classifier,
    n_iter=50,  # Number of random combinations to try
    cv=5,
    n_jobs=-1,
    random_state=42
)
random_search_classifier.fit(X_train_strategy, y_strategy_train_split)
best_classifier = random_search_classifier.best_estimator_

# Hyperparameter tuning for RandomForestRegressor
param_dist_regressor = {
    'n_estimators': np.arange(100, 201, 10),
    'max_depth': np.arange(10, 31, 5),
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_search_regressor = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist_regressor,
    n_iter=50,
    cv=5,
    n_jobs=-1,
    random_state=42
)
random_search_regressor.fit(X_train_close, y_close_train_split)
best_regressor = random_search_regressor.best_estimator_

In [None]:
# Evaluate the Classifier
strategy_pred_val = best_classifier.predict(X_val_strategy)
accuracy = accuracy_score(y_strategy_val_split, strategy_pred_val)
print(f'Classification Accuracy: {accuracy}')

# Evaluate the Regressor
close_pred_val = best_regressor.predict(X_val_close)
mse = mean_squared_error(y_close_val_split, close_pred_val)
print(f'Regression Mean Squared Error: {mse}')
