In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor


In [2]:
# Load the training data
train_data = pd.read_csv('train.csv')

# Preprocess 'Date' column
train_data['Date'] = pd.to_datetime(train_data['Date'])
train_data['Date'] = train_data['Date'].dt.year * 10000 + train_data['Date'].dt.month * 100 + train_data['Date'].dt.day

# Drop rows with missing values
train_data.dropna(inplace=True)

# Split features and labels for training
X_train = train_data.drop(['Strategy', 'Close'], axis=1)
y_strategy_train = train_data['Strategy']
y_close_train = train_data['Close']

# Encode 'Strategy' labels
label_encoder = LabelEncoder()
y_strategy_encoded_train = label_encoder.fit_transform(y_strategy_train)


In [3]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Hyperparameter tuning for RandomForestClassifier
param_grid_classifier = {
    'n_estimators': [100, 150, 200],
    'max_depth': [15, 20, 25],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search_classifier = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid_classifier,
    cv=5,
    n_jobs=-1
)
grid_search_classifier.fit(X_train_scaled, y_strategy_encoded_train)
best_classifier = grid_search_classifier.best_estimator_

# Hyperparameter tuning for RandomForestRegressor
param_grid_regressor = {
    'n_estimators': [100, 150, 200],
    'max_depth': [15, 20, 25],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [4]:
grid_search_regressor = GridSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_grid=param_grid_regressor,
    cv=5,
    n_jobs=-1
)
grid_search_regressor.fit(X_train_scaled, y_close_train)
best_regressor = grid_search_regressor.best_estimator_

In [5]:
# Load the test data
test_data = pd.read_csv('test.csv')

# Preprocess 'Date' column for test data
test_data['Date'] = pd.to_datetime(test_data['Date'])
test_data['Date'] = test_data['Date'].dt.year * 10000 + test_data['Date'].dt.month * 100 + test_data['Date'].dt.day

# Standardize features for test data
X_test_scaled = scaler.transform(test_data)

# Make predictions for 'Strategy'
strategy_pred = best_classifier.predict(X_test_scaled)

# Decode 'Strategy' predictions
decoded_strategy_pred = label_encoder.inverse_transform(strategy_pred)

# Make predictions for 'Close'
close_pred = best_regressor.predict(X_test_scaled)

# Output predictions to a DataFrame
predictions_df = pd.DataFrame({
    'id': test_data['id'],  # Include 'id' column in the predictions
    'Date': test_data['Date'],
    'Close': close_pred,
    'Strategy': decoded_strategy_pred
})

# Save predictions to a CSV file
predictions_df.to_csv('sample_submission4.csv', index=False)

print("Predictions saved to 'sample_submission4.csv'")

Predictions saved to 'sample_submission4.csv'
