In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Load datasets
train_data = pd.read_csv('/kaggle/input/testtrain/train.csv')
test_data = pd.read_csv('/kaggle/input/testtrain/test.csv')

In [2]:
def SMA(data, window):
    return data.rolling(window=window).mean()

def EMA(data, window):
    return data.ewm(span=window, adjust=False).mean()

def RSI(data, window):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    RS = gain / loss
    return 100 - (100 / (1 + RS))

# Apply indicators
window_sma, window_ema, window_rsi = 20, 20, 14
train_data['sma'] = SMA(train_data['close'], window_sma)
train_data['ema'] = EMA(train_data['close'], window_ema)
train_data['rsi'] = RSI(train_data['close'], window_rsi)
test_data['sma'] = SMA(test_data['close'], window_sma)
test_data['ema'] = EMA(test_data['close'], window_ema)
test_data['rsi'] = RSI(test_data['close'], window_rsi)

In [3]:
features = ['open', 'high', 'low', 'close', 'volume', 'sma', 'ema', 'rsi']
X = train_data[features]
y = train_data['target']
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(
    n_estimators=100,    # Reduced number of trees
    max_depth=10,        # Limiting tree depth
    min_samples_leaf=50, # More samples per leaf
    max_features='sqrt', # Reduced number of features considered
    n_jobs=-1,           # Use all available CPU cores
    random_state=42
)
model.fit(X_train, y_train)

# Evaluate the model
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))

Accuracy: 0.5285167071860689


In [4]:
X_submission = test_data[features]
X_submission = imputer.transform(X_submission)  # Transform test data using the same imputer

# Predict using the trained model
test_data['target'] = model.predict(X_submission)

# Save predictions to CSV
submission = test_data[['row_id', 'target']]
submission.to_csv('submission.csv', index=False)

print("Submission file created successfully.")

Submission file created successfully.
