In [55]:
# Import standard libraries
import pandas as pd
import numpy as np
import importlib

# Import and reload mysklearn package modules
import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyDecisionTreeClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
from mysklearn.myevaluation import (
    stratified_kfold_split, 
    confusion_matrix, 
    accuracy_score,
    binary_precision_score, 
    binary_recall_score, 
    binary_f1_score
)

# Bitcoin Price Direction Prediction 




This notebook demonstrates a complete machine learning pipeline for Bitcoin price direction prediction:

1. **Data Preprocessing**: Load raw Bitcoin sentiment data, create binary price direction labels, normalize features, and discretize into categorical bins
2. **Data Leakage Prevention**: Drop OHLC features (open, high, low, close) to ensure the model predicts legitimately using only volume, sentiment, and macroeconomic indicators
3. **Random Forest Training**: Train multiple Random Forest instances (N=100 trees, M=3 best trees, F=8 features per split) with different random states for robust evaluation
4. **Evaluation**: Assess performance using stratified train/test split (33% test size), confusion matrices, per-class metrics, and OOB scores across 5 independent runs
5. **Hyperparameter Tuning**: Grid search over N, M, and F parameters to identify optimal configuration

**Methodology:**
- Dataset is balanced (~50/50 Up/Down)
- All numeric features discretized into categorical bins (VeryLow to VeryHigh)
- Random Forest uses stratified sampling to preserve class distribution
- Bootstrap sampling with random feature selection for each tree
- Features used: trading volume, sentiment analysis, and treasury/debt indicators

**Data Integrity:**
- No temporal leakage (OHLC prices removed)
- Legitimate forecasting task (predict tomorrow using today's non-price data)
- Model performance reflects true predictive capability
- Baseline comparison: 50% accuracy (random guessing for binary classification)

## Step 1: Load and Examine the Dataset

In [5]:
# Load the bitcoin sentiment dataset
df = pd.read_csv('input_data/bitcoin_sentiment.csv')

# Print dataset shape
print("Dataset Shape:")
print(f"  Rows: {df.shape[0]}")
print(f"  Columns: {df.shape[1]}")
print()


# Print headers (column names)
print("Column Headers:")
print(df.columns.tolist())
print()

# Print first few rows
print("First 5 Rows:")
print(df.head())
print()

Dataset Shape:
  Rows: 1074
  Columns: 28

Column Headers:
['Unnamed: 0', 'timestamp', 'open', 'high', 'low', 'close', 'volume', 'datetime_utc', 'merge_date', 'domestic_series', 'federal_financing_bank', 'foreign_series', 'government_account_series', 'government_account_series_inflation_securities', 'special_purpose_vehicle', 'state_and_local_government_series', 'total_interest-bearing_debt', 'total_marketable', 'total_non-marketable', 'treasury_bills', 'treasury_bonds', 'treasury_floating_rate_notes_(frn)', 'treasury_inflation-protected_securities_(tips)', 'treasury_notes', 'united_states_savings_inflation_securities', 'united_states_savings_securities', 'weighted_sentiment', 'sentiment_missing']

First 5 Rows:
   Unnamed: 0      timestamp      open      high       low     close  \
0           0  1669852800000  17165.44  17317.80  16855.00  16980.08   
1           1  1669939200000  16980.07  17108.25  16791.02  17094.71   
2           2  1670025600000  17094.25  17158.42  16863.58  16

## Inspect class/label distribution 


In [56]:
# Print label distribution 
label_column = df.columns[5]
print(f"Label Distribution ('{label_column}'):")
print(df[label_column])
print()

Label Distribution ('close'):
0        16980.08
1        17094.71
2        16888.53
3        17108.90
4        16966.05
          ...    
1069    101468.15
1070    103869.00
1071    101290.50
1072    103284.27
1073    102249.20
Name: close, Length: 1074, dtype: float64



## Inspect weight_sentiment and other features 

In [57]:
# Examine the weighted_sentiment column more closely
print("Weighted Sentiment Statistics:")
print(df['weighted_sentiment'].describe())
print()

# Check for missing values
if df.isnull().values.any():
    print("Missing Values per Column:")
    print(df.isnull().sum())
else:
    print("No missing values found in the dataset.")
print()

print("-" * 70)
print()

# Check sentiment_missing column values for anything other than zero
print("Checking for rows where 'sentiment_missing' != 0:")
print()
if (df['sentiment_missing'] != 0).any():
    print(df[df['sentiment_missing'] != 0])
else:
    print("No rows with sentiment_missing != 0 found.")
print()

Weighted Sentiment Statistics:
count    1074.000000
mean        0.347973
std         0.274657
min        -0.749771
25%         0.171151
50%         0.376796
75%         0.540075
max         0.952912
Name: weighted_sentiment, dtype: float64

No missing values found in the dataset.

----------------------------------------------------------------------

Checking for rows where 'sentiment_missing' != 0:

No rows with sentiment_missing != 0 found.



## Inspect weighted_sentiment distribution (exploratory analysis only)

In [None]:
# Create binary classification label from weighted_sentiment for exploratory analysis
# NOTE: This is NOT used for model training - only for understanding sentiment distribution
# Positive sentiment (>0) vs Negative/Neutral sentiment (<=0)
df['sentiment_label'] = df['weighted_sentiment'].apply(lambda x: 'Positive' if x > 0 else 'Negative/Neutral')

print("Classification Label Distribution (for weighted_sentiment):")
print(df['sentiment_label'].value_counts())
print()
print("Label Proportions:")
print(df['sentiment_label'].value_counts(normalize=True))
print()

Classification Label Distribution:(for weighted_sentiment)
sentiment_label
Positive            949
Negative/Neutral    125
Name: count, dtype: int64

Label Proportions:
sentiment_label
Positive            0.883613
Negative/Neutral    0.116387
Name: proportion, dtype: float64



---

## Step 2: Create Classification Label (Price Direction)

- Convert continuous `close` price into a binary classification target by comparing each day's closing price with the previous day.

In [60]:
# Reload the original dataset
df_original = pd.read_csv('input_data/bitcoin_sentiment.csv')

# Create the discretized label by comparing close with previous day's close
# First row will be dropped since there's no previous day
df_original['price_direction'] = 'Down'  # Default value

# Compare current close with previous close
for i in range(1, len(df_original)):
    if df_original.loc[i, 'close'] > df_original.loc[i-1, 'close']:
        df_original.loc[i, 'price_direction'] = 'Up'
    else:
        df_original.loc[i, 'price_direction'] = 'Down'

# Remove the first row (no previous day to compare)
df_discretized = df_original.iloc[1:].copy()
df_discretized = df_discretized.reset_index(drop=True)

print("Discretized Label Distribution (price_direction):")
print(df_discretized['price_direction'].value_counts())
print()
print("Label Proportions:")
print(df_discretized['price_direction'].value_counts(normalize=True))
print()
print(f"Total instances after discretization: {len(df_discretized)}")
print(f"(Original: {len(df_original)}, Removed first row: 1)")

Discretized Label Distribution (price_direction):
price_direction
Up      543
Down    530
Name: count, dtype: int64

Label Proportions:
price_direction
Up      0.506058
Down    0.493942
Name: proportion, dtype: float64

Total instances after discretization: 1073
(Original: 1074, Removed first row: 1)


## Step 3: Drop Unnecessary Features

Remove temporal columns, identifiers, constant features, and **OHLC price features** to prevent data leakage.

In [61]:
# Drop unnecessary columns (temporal, IDs, constant features, and OHLC to prevent leakage)
columns_to_drop = [
    'Unnamed: 0', 
    'timestamp', 
    'datetime_utc', 
    'merge_date', 
    'sentiment_missing', 
    'domestic_series',
    'open',   
    'high', 
    'low',   
    'close'   
]
df_clean = df_discretized.drop(columns=columns_to_drop)

print("Dataset after dropping unnecessary columns:")
print(f"  Rows: {df_clean.shape[0]}")
print(f"  Columns: {df_clean.shape[1]}")
print(f"  Dropped: {columns_to_drop}")
print()

Dataset after dropping unnecessary columns:
  Rows: 1073
  Columns: 19
  Dropped: ['Unnamed: 0', 'timestamp', 'datetime_utc', 'merge_date', 'sentiment_missing', 'domestic_series', 'open', 'high', 'low', 'close']



## Step 4: Normalize Numeric Features

Apply z-score normalization to scale all numeric features to mean=0 and std=1.

In [11]:
# Identify numeric columns (exclude the label column 'price_direction')
numeric_columns = df_clean.select_dtypes(include=[np.number]).columns.tolist()

print(f"Found {len(numeric_columns)} numeric features to normalize")
print()

# Check the scale of numeric features before normalization
print("Feature ranges before normalization:")
for col in numeric_columns[:18]:
    print(f"  {col}: [{df_clean[col].min():.2f}, {df_clean[col].max():.2f}]")
print()

Found 18 numeric features to normalize

Feature ranges before normalization:
  volume: [1227.77, 65575.10]
  federal_financing_bank: [2.39, 2.58]
  foreign_series: [0.00, 7.31]
  government_account_series: [2.13, 3.17]
  government_account_series_inflation_securities: [0.99, 1.31]
  special_purpose_vehicle: [2.89, 4.17]
  state_and_local_government_series: [1.81, 3.85]
  total_interest-bearing_debt: [2.22, 3.37]
  total_marketable: [2.24, 3.42]
  total_non-marketable: [2.13, 3.19]
  treasury_bills: [3.46, 5.45]
  treasury_bonds: [3.01, 3.34]
  treasury_floating_rate_notes_(frn): [3.90, 5.54]
  treasury_inflation-protected_securities_(tips): [0.49, 0.96]
  treasury_notes: [1.68, 3.12]
  united_states_savings_inflation_securities: [3.08, 10.15]
  united_states_savings_securities: [2.69, 3.49]
  weighted_sentiment: [-0.75, 0.95]



### 4.1 Apply Z-Score Normalization

In [63]:
# z-score normalization: (x - mean) / std
# This transforms each feature to have mean=0 and std=1

df_normalized = df_clean.copy()

for col in numeric_columns:
    mean = df_normalized[col].mean()
    std = df_normalized[col].std()
    
    # Avoid division by zero for constant columns
    if std > 0:
        df_normalized[col] = (df_normalized[col] - mean) / std
    else:
        print(f"Warning: {col} has std=0, skipping normalization")

print("Numeric features standardized (z-score normalization)")
print()

# Verify normalization
print("Feature statistics after normalization:")
print(df_normalized[numeric_columns].describe().loc[['mean', 'std']].round(6))
print()

Numeric features standardized (z-score normalization)

Feature statistics after normalization:
      volume  federal_financing_bank  foreign_series  \
mean    -0.0                     0.0             0.0   
std      1.0                     1.0             1.0   

      government_account_series  \
mean                        0.0   
std                         1.0   

      government_account_series_inflation_securities  special_purpose_vehicle  \
mean                                             0.0                     -0.0   
std                                              1.0                      1.0   

      state_and_local_government_series  total_interest-bearing_debt  \
mean                                0.0                         -0.0   
std                                 1.0                          1.0   

      total_marketable  total_non-marketable  treasury_bills  treasury_bonds  \
mean              -0.0                  -0.0             0.0             0.0   
std      

## Step 5: Discretize Features into Categorical Bins

Convert normalized numeric features into categorical bins for entropy-based decision tree classification.

In [64]:
# Discretize normalized numeric features into categorical bins
# Using quantile-based binning (equal frequency bins)

df_discretized_final = df_normalized.copy()

# Define binning strategy: convert normalized values to 5 categories
# Since normalized data has mean=0, std=1, we can use standard deviations as boundaries
def discretize_normalized_feature(series, n_bins=5):
    """
    Discretize a normalized feature into categorical bins.
    Uses quantile-based binning for equal frequency distribution.
    """
    # Use pandas qcut for quantile-based binning
    bins = pd.qcut(series, q=n_bins, labels=['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'], duplicates='drop')
    return bins

print("Discretizing numeric features into 5 categorical bins...")
print("Bins: VeryLow, Low, Medium, High, VeryHigh")
print()

# Track which columns get discretized
discretized_cols = []
skipped_cols = []

for col in numeric_columns:
    try:
        df_discretized_final[col] = discretize_normalized_feature(df_discretized_final[col])
        discretized_cols.append(col)
    except Exception as e:
        # Some columns might have too few unique values to discretize
        print(f"Warning: Could not discretize '{col}': {e}")
        skipped_cols.append(col)


print(f"Skipped {len(skipped_cols)} columns: {skipped_cols}")

Discretizing numeric features into 5 categorical bins...
Bins: VeryLow, Low, Medium, High, VeryHigh

Skipped 2 columns: ['federal_financing_bank', 'foreign_series']


### 5.1 Handle Columns with Too Few Unique Values for Quantile Binning

In [65]:
# Handle columns that couldn't be discretized (too few unique values)
# Convert them to categorical based on their actual unique values

if skipped_cols:
    print("Handling skipped columns with manual discretization:")
    for col in skipped_cols:
        unique_vals = df_discretized_final[col].nunique()
        print(f"  {col}: {unique_vals} unique values")
        
        # Apply custom discretization rules based on column name
        if col == 'federal_financing_bank':
            # Map 5 unique values directly to Bank1-Bank5 (preserves ordinality)
            sorted_unique = sorted(df_normalized[col].unique())
            bank_mapping = {val: f'Bank{i+1}' for i, val in enumerate(sorted_unique)}
            df_discretized_final[col] = df_normalized[col].map(bank_mapping)
            print(f"    Direct mapping: {len(sorted_unique)} unique values → Bank1-Bank{len(sorted_unique)}")
            print(f"    Distribution: {df_discretized_final[col].value_counts().sort_index().to_dict()}")
            
        elif col == 'foreign_series':
            # Bin into 0 or 1 (binary) using the original normalized data
            df_discretized_final[col] = pd.cut(df_normalized[col], bins=2, labels=['0', '1'])
            
        elif unique_vals == 1:
            # Constant feature
            df_discretized_final[col] = 'Constant'
            
        elif unique_vals == 2:
            df_discretized_final[col] = pd.cut(df_normalized[col], bins=2, labels=['Low', 'High'])
            
        elif unique_vals <= 5:
            # For 3-5 unique values, map directly to preserve ordinal structure
            sorted_unique = sorted(df_normalized[col].unique())
            level_mapping = {val: f'Level_{i}' for i, val in enumerate(sorted_unique)}
            df_discretized_final[col] = df_normalized[col].map(level_mapping)
            
        else:
            # Default to 5 bins for columns with more unique values
            df_discretized_final[col] = pd.qcut(df_normalized[col], q=5, 
                                                 labels=['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'], 
                                                 duplicates='drop')
    

Handling skipped columns with manual discretization:
  federal_financing_bank: 5 unique values
    Direct mapping: 5 unique values → Bank1-Bank5
    Distribution: {'Bank1': 131, 'Bank2': 365, 'Bank3': 366, 'Bank4': 181, 'Bank5': 30}
  foreign_series: 2 unique values


In [67]:
# Convert all object columns to category dtype for better memory efficiency
for col in df_discretized_final.columns:
    if df_discretized_final[col].dtype == 'object':
        df_discretized_final[col] = df_discretized_final[col].astype('category')

print("All categorical features converted to 'category' dtype")
print()

# Verify discretization - check value distributions
print("Sample of discretized features:")
print(df_discretized_final.head(10))
print()

# Show distribution for a few key features (updated - no OHLC features)
sample_features = ['volume', 'weighted_sentiment', 'federal_financing_bank', 'foreign_series']
for feature in sample_features:
    if feature in df_discretized_final.columns:
        print(f"\nDistribution of discretized '{feature}':")
        print(df_discretized_final[feature].value_counts().sort_index())

print()
print("Data types after discretization:")
print(df_discretized_final.dtypes)


All categorical features converted to 'category' dtype

Sample of discretized features:
     volume federal_financing_bank foreign_series government_account_series  \
0  VeryHigh                  Bank4              1                   VeryLow   
1      High                  Bank4              1                   VeryLow   
2      High                  Bank4              1                   VeryLow   
3  VeryHigh                  Bank4              1                   VeryLow   
4  VeryHigh                  Bank4              1                   VeryLow   
5  VeryHigh                  Bank4              1                   VeryLow   
6  VeryHigh                  Bank4              1                   VeryLow   
7  VeryHigh                  Bank4              1                   VeryLow   
8       Low                  Bank4              1                   VeryLow   
9    Medium                  Bank4              1                   VeryLow   

  government_account_series_inflation_secu

## Step 6: Save Preprocessed Dataset

Export the final preprocessed dataset ready for classification.

In [68]:
# Save the final preprocessed dataset (ready for classification)
output_file_final = 'input_data/bitcoin_sentiment_discretized.csv'
df_discretized_final.to_csv(output_file_final, index=False)

print(f"✓ Final preprocessed dataset saved to: {output_file_final}")
print(f"  Total instances: {len(df_discretized_final)}")
print(f"  Total features: {len(df_discretized_final.columns) - 1}")
print(f"  Label column: 'price_direction'")
print()


✓ Final preprocessed dataset saved to: input_data/bitcoin_sentiment_discretized.csv
  Total instances: 1073
  Total features: 18
  Label column: 'price_direction'



## Preprocessing Summary

In [75]:
print("Preprocessing Pipeline Summary:")
print(f"  1. Loaded raw Bitcoin sentiment dataset")
print(f"  2. Created binary classification label 'price_direction' (Up/Down) from daily close price changes")
print(f"  3. Dropped {len(columns_to_drop)} unnecessary columns (temporal, IDs, constants, OHLC prices to prevent leakage)")
print(f"  4. Normalized {len(numeric_columns)} numeric features using z-score normalization (mean=0, std=1)")
print(f"  5. Discretized features into categorical bins using quantile-based binning:")
print(f"     - Standard features: VeryLow, Low, Medium, High, VeryHigh (5 equal-frequency bins)")
print(f"     - Special handling for columns with too few unique values")
print(f"  6. Converted all features to categorical dtype for memory efficiency")
print(f"  7. Final dataset: {len(df_discretized_final)} instances, {len(df_discretized_final.columns) - 1} features")
print(f"  8. Label distribution: {df_discretized_final['price_direction'].value_counts().to_dict()}")
print(f"  9. Dataset ready for MyRandomForestClassifier training")
print()

Preprocessing Pipeline Summary:
  1. Loaded raw Bitcoin sentiment dataset
  2. Created binary classification label 'price_direction' (Up/Down) from daily close price changes
  3. Dropped 10 unnecessary columns (temporal, IDs, constants, OHLC prices to prevent leakage)
  4. Normalized 18 numeric features using z-score normalization (mean=0, std=1)
  5. Discretized features into categorical bins using quantile-based binning:
     - Standard features: VeryLow, Low, Medium, High, VeryHigh (5 equal-frequency bins)
     - Special handling for columns with too few unique values
  6. Converted all features to categorical dtype for memory efficiency
  7. Final dataset: 1073 instances, 18 features
  8. Label distribution: {'Up': 543, 'Down': 530}
  9. Dataset ready for MyRandomForestClassifier training



---

# Part 2: Random Forest Classification

## Step 1: Load Preprocessed Data for Classification

In [17]:
# Load the preprocessed dataset
df_ready = pd.read_csv('input_data/bitcoin_sentiment_discretized.csv')

print("Preprocessed Dataset Loaded:")
print(f"  Shape: {df_ready.shape}")
print(f"  Features: {df_ready.shape[1] - 1}")
print(f"  Instances: {df_ready.shape[0]}")
print()

# Display first few rows
print("First 5 rows:")
print(df_ready.head())
print()

# Check data types
print("Data types:")
print(df_ready.dtypes)
print()

Preprocessed Dataset Loaded:
  Shape: (1073, 19)
  Features: 18
  Instances: 1073

First 5 rows:
     volume federal_financing_bank  foreign_series government_account_series  \
0  VeryHigh                  Bank4               1                   VeryLow   
1      High                  Bank4               1                   VeryLow   
2      High                  Bank4               1                   VeryLow   
3  VeryHigh                  Bank4               1                   VeryLow   
4  VeryHigh                  Bank4               1                   VeryLow   

  government_account_series_inflation_securities special_purpose_vehicle  \
0                                        VeryLow                     Low   
1                                        VeryLow                     Low   
2                                        VeryLow                     Low   
3                                        VeryLow                     Low   
4                                        V

## Step 2: Exploratory Data Analysis (EDA)

### 2.1 Class Distribution Analysis

In [71]:
# Analyze class distribution
print("="*70)
print("CLASS DISTRIBUTION ANALYSIS")
print("="*70)
print()

label_counts = df_ready['price_direction'].value_counts()
label_props = df_ready['price_direction'].value_counts(normalize=True)

print("Price Direction Distribution:")
for label in sorted(label_counts.index):
    count = label_counts[label]
    prop = label_props[label]
    print(f"  {label}: {count} instances ({prop*100:.2f}%)")

print()
balance_diff = abs(label_props.iloc[0] - 0.5)
print(f"Dataset is {'balanced' if balance_diff < 0.1 else 'imbalanced'}")
print(f"Balance metric: {(1 - balance_diff*2)*100:.1f}%")
print()

CLASS DISTRIBUTION ANALYSIS

Price Direction Distribution:
  Down: 530 instances (49.39%)
  Up: 543 instances (50.61%)

Dataset is balanced
Balance metric: 98.8%



### 2.2 Feature Distribution Analysis

Note: Due to quantile-based binning, most features have approximately equal frequency distributions (~20% per bin for 5 bins).

In [None]:
# Analyze feature distributions
# Note: Quantile binning ensures roughly equal frequencies across bins
print("="*70)
print("FEATURE DISTRIBUTION SUMMARY")
print("="*70)
print()

feature_cols = [col for col in df_ready.columns if col != 'price_direction']

print(f"Total features: {len(feature_cols)}")
print()

# Sample key features for detailed analysis (updated - no OHLC features)
sample_features_eda = ['volume', 'weighted_sentiment', 'federal_financing_bank', 'foreign_series', 'total_marketable']

for feature in sample_features_eda:
    if feature in df_ready.columns:
        print(f"\n{feature}:")
        print(f"  Unique values: {df_ready[feature].nunique()}")
        value_counts = df_ready[feature].value_counts().sort_index()
        for val, count in value_counts.items():
            print(f"    {val}: {count} ({count/len(df_ready)*100:.1f}%)")


FEATURE DISTRIBUTION SUMMARY

Total features: 18


volume:
  Unique values: 5
    High: 214 (19.9%)
    Low: 214 (19.9%)
    Medium: 215 (20.0%)
    VeryHigh: 215 (20.0%)
    VeryLow: 215 (20.0%)

weighted_sentiment:
  Unique values: 5
    High: 214 (19.9%)
    Low: 214 (19.9%)
    Medium: 215 (20.0%)
    VeryHigh: 215 (20.0%)
    VeryLow: 215 (20.0%)

federal_financing_bank:
  Unique values: 5
    Bank1: 131 (12.2%)
    Bank2: 365 (34.0%)
    Bank3: 366 (34.1%)
    Bank4: 181 (16.9%)
    Bank5: 30 (2.8%)

foreign_series:
  Unique values: 2
    0: 953 (88.8%)
    1: 120 (11.2%)

total_marketable:
  Unique values: 5
    High: 214 (19.9%)
    Low: 213 (19.9%)
    Medium: 212 (19.8%)
    VeryHigh: 192 (17.9%)
    VeryLow: 242 (22.6%)


## Step 3: Prepare Data for Random Forest

In [20]:
# Separate features (X) and label (y)
X_data = df_ready.drop(columns=['price_direction']).values.tolist()
y_data = df_ready['price_direction'].tolist()

print("Data Preparation:")
print(f"  X shape: ({len(X_data)}, {len(X_data[0])})")
print(f"  y shape: ({len(y_data)},)")
print()
print(f"  Number of features: {len(X_data[0])}")
print(f"  Number of instances: {len(X_data)}")
print()
print("Sample instance (first 5 features):")
print(f"  X[0][:5] = {X_data[0][:5]}")
print(f"  y[0] = {y_data[0]}")
print()

Data Preparation:
  X shape: (1073, 18)
  y shape: (1073,)

  Number of features: 18
  Number of instances: 1073

Sample instance (first 5 features):
  X[0][:5] = ['VeryHigh', 'Bank4', 1, 'VeryLow', 'VeryLow']
  y[0] = Up



## Step 4: Train Random Forest Classifier

In [54]:
# Import Random Forest Classifier
import importlib
import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyRandomForestClassifier
import math

# Set hyperparameters
N = 100  # Number of trees to train
M = 3    # Best trees to select for final ensemble
F = 8    # Features per split

print("="*70)
print("TRAINING RANDOM FOREST CLASSIFIER (5 Different Random States)")
print("="*70)
print()

print("Random Forest Configuration:")
print(f"  N (number of trees): {N}")
print(f"  M (best trees for final ensemble): {M}")
print(f"  F (features per split): {F}")
print(f"  Bootstrap sampling: Yes")
print(f"  Test set size: 33% (stratified)")
print(f"  Number of runs: 5 (different random states)")
print()

# Train 5 different Random Forest instances with different random states
rf_classifiers = []
random_states = [42, 43, 44, 45, 46]

for i, rand_state in enumerate(random_states, 1):
    print(f"Training Random Forest #{i} (random_state={rand_state})...")
    
    rf = MyRandomForestClassifier(
        n_estimators=N,
        n_best_trees=M,
        max_features=F,
        bootstrap=True,
        random_state=rand_state,
        test_size=0.33
    )
    
    rf.fit(X_data, y_data)
    rf_classifiers.append(rf)
    print(f"  ✓ Complete!")

print()
print("="*70)
print("✓ All 5 Random Forest instances trained successfully!")
print("="*70)
print()

TRAINING RANDOM FOREST CLASSIFIER (5 Different Random States)

Random Forest Configuration:
  N (number of trees): 100
  M (best trees for final ensemble): 3
  F (features per split): 8
  Bootstrap sampling: Yes
  Test set size: 33% (stratified)
  Number of runs: 5 (different random states)

Training Random Forest #1 (random_state=42)...
  ✓ Complete!
Training Random Forest #2 (random_state=43)...
  ✓ Complete!
Training Random Forest #3 (random_state=44)...
  ✓ Complete!
Training Random Forest #4 (random_state=45)...
  ✓ Complete!
Training Random Forest #5 (random_state=46)...
  ✓ Complete!

✓ All 5 Random Forest instances trained successfully!



## Step 5: Evaluate Random Forest Performance

In [84]:
# Evaluate Random Forest performance across all 5 instances
from mysklearn.myevaluation import confusion_matrix, accuracy_score

# Collect metrics from all instances
all_test_accs = []
all_oob_scores = []

for i, rf in enumerate(rf_classifiers, 1):
    test_acc = rf.get_test_accuracy()
    oob_score = rf.get_oob_score()
    
    if test_acc is not None:
        all_test_accs.append(test_acc)
    if oob_score is not None:
        all_oob_scores.append(oob_score)

print("="*80)
print("RANDOM FOREST PERFORMANCE SUMMARY - AGGREGATE STATISTICS")
print("="*80)
print()

# Display aggregate statistics
print(f"Evaluated {len(rf_classifiers)} Random Forest instances (random_states: {random_states})")
print()

avg_test_acc = sum(all_test_accs) / len(all_test_accs)
std_test_acc = (sum((x - avg_test_acc)**2 for x in all_test_accs) / len(all_test_accs))**0.5
print(f"Test Accuracy Statistics:")
print(f"  Mean:   {avg_test_acc:.4f}")
print(f"  Std:    {std_test_acc:.4f}")
print(f"  Min:    {min(all_test_accs):.4f}")
print(f"  Max:    {max(all_test_accs):.4f}")
print(f"  Range:  {max(all_test_accs) - min(all_test_accs):.4f}")
print()

avg_oob = sum(all_oob_scores) / len(all_oob_scores)
std_oob = (sum((x - avg_oob)**2 for x in all_oob_scores) / len(all_oob_scores))**0.5
print(f"OOB Score Statistics:")
print(f"  Mean:   {avg_oob:.4f}")
print(f"  Std:    {std_oob:.4f}")
print(f"  Min:    {min(all_oob_scores):.4f}")
print(f"  Max:    {max(all_oob_scores):.4f}")
print(f"  Range:  {max(all_oob_scores) - min(all_oob_scores):.4f}")
print()

# Performance interpretation
print("Performance Analysis:")
if avg_test_acc > 0.52:
    print(f"  ✓ Mean accuracy ({avg_test_acc:.4f}) above 50% baseline - model shows predictive signal")
elif avg_test_acc > 0.50:
    print(f"  ~ Mean accuracy ({avg_test_acc:.4f}) slightly above baseline - weak predictive power")
else:
    print(f"  ✗ Mean accuracy ({avg_test_acc:.4f}) at/below baseline - features may not predict effectively")
print()

print("="*80)

RANDOM FOREST PERFORMANCE SUMMARY - AGGREGATE STATISTICS

Evaluated 5 Random Forest instances (random_states: [42, 43, 44, 45, 46])

Test Accuracy Statistics:
  Mean:   0.5020
  Std:    0.0221
  Min:    0.4592
  Max:    0.5211
  Range:  0.0620

OOB Score Statistics:
  Mean:   0.8047
  Std:    0.0101
  Min:    0.7967
  Max:    0.8245
  Range:  0.0279

Performance Analysis:
  ~ Mean accuracy (0.5020) slightly above baseline - weak predictive power



### 5.1 Prediction Examples

In [50]:
# Show prediction examples for all 5 Random Forest instances

print("="*80)
print("SAMPLE PREDICTIONS - ALL 5 INSTANCES")
print("="*80)
print()

feature_names_rf = [col for col in df_discretized_final.columns if col != 'price_direction']

# Get indices for volume and weighted_sentiment
volume_idx = feature_names_rf.index('volume')
sentiment_idx = feature_names_rf.index('weighted_sentiment')

# Show first 10 test instances for each Random Forest
for i, rf in enumerate(rf_classifiers, 1):
    print(f"\n{'='*80}")
    print(f"INSTANCE #{i} (random_state={random_states[i-1]}) - First 10 Test Predictions")
    print(f"{'='*80}")
    print()
    
    y_pred_test = rf.predict(rf.X_test_internal)
    y_true_test = rf.y_test_internal
    
    # Display first 10 predictions in compact format
    for j in range(min(10, len(y_true_test))):
        instance = rf.X_test_internal[j]
        actual = y_true_test[j]
        predicted = y_pred_test[j]
        correct = 'Yes' if actual == predicted else 'No'
        volume = instance[volume_idx]
        sentiment = instance[sentiment_idx]
        
        print(f"  {j+1:2d}. Actual: {actual:>4s} | Predicted: {predicted:>4s} | Volume: {volume:>8s} | Sentiment: {sentiment:>8s} | Correct: {correct:>3s}")
    
print()
print("="*80)

SAMPLE PREDICTIONS - ALL 5 INSTANCES


INSTANCE #1 (random_state=42) - First 10 Test Predictions

   1. Actual: Down | Predicted: Down | Volume:  VeryLow | Sentiment: VeryHigh | Correct: Yes
   2. Actual:   Up | Predicted:   Up | Volume:   Medium | Sentiment:      Low | Correct: Yes
   3. Actual: Down | Predicted: Down | Volume:  VeryLow | Sentiment:  VeryLow | Correct: Yes
   4. Actual: Down | Predicted: Down | Volume:  VeryLow | Sentiment:     High | Correct: Yes
   5. Actual:   Up | Predicted: Down | Volume:   Medium | Sentiment:  VeryLow | Correct:  No
   6. Actual: Down | Predicted: Down | Volume:   Medium | Sentiment:      Low | Correct: Yes
   7. Actual:   Up | Predicted: Down | Volume:  VeryLow | Sentiment:      Low | Correct:  No
   8. Actual:   Up | Predicted:   Up | Volume:      Low | Sentiment:      Low | Correct: Yes
   9. Actual:   Up | Predicted:   Up | Volume:      Low | Sentiment:      Low | Correct: Yes
  10. Actual: Down | Predicted: Down | Volume:  VeryLow | Sentime

In [25]:
import time
from mysklearn.myclassifiers import MyRandomForestClassifier
from mysklearn.myevaluation import confusion_matrix, accuracy_score

# Define parameter grid to search
param_grid = {
    'N': [10, 20, 50, 100, 200],           # Number of trees (including large values)
    'M': [3, 5, 7, 10, 15],                # Best trees to select  
    'F': [2, 4, 6, 8, 12, 18]              # Features per split (18 = all features)
}

# Number of runs per configuration (to handle randomness)
n_runs = 5

print("="*80)
print("RANDOM FOREST HYPERPARAMETER TUNING")
print("="*80)
print()
print(f"Parameter Grid:")
print(f"  N (number of trees): {param_grid['N']}")
print(f"  M (best trees): {param_grid['M']}")
print(f"  F (features per split): {param_grid['F']}")
print()
print(f"Runs per configuration: {n_runs}")
print(f"Total configurations: {len(param_grid['N']) * len(param_grid['M']) * len(param_grid['F'])}")
print(f"Total model trainings: {len(param_grid['N']) * len(param_grid['M']) * len(param_grid['F']) * n_runs}")
print()

# Store results
results = []

print("Starting hyperparameter search...")
print("This may take several minutes...")
print()

# Grid search
total_configs = 0
start_time = time.time()

for N in param_grid['N']:
    for M in param_grid['M']:
        # Skip invalid combinations (M cannot exceed N)
        if M > N:
            continue
            
        for F in param_grid['F']:
            total_configs += 1
            
            # Run multiple times with different seeds
            run_accuracies = []
            run_oob_scores = []
            best_conf_matrix = None
            best_accuracy = 0
            
            for run in range(n_runs):
                # Use different random seed for each run
                random_seed = 42 + run
                
                try:
                    # Train Random Forest
                    rf = MyRandomForestClassifier(
                        n_estimators=N,
                        max_features=F,
                        bootstrap=True,
                        random_state=random_seed,
                        test_size=0.33
                    )
                    
                    rf.fit(X_data, y_data)
                    
                    # Get predictions
                    y_pred = rf.predict(rf.X_test_internal)
                    y_true = rf.y_test_internal
                    
                    # Calculate metrics
                    acc = accuracy_score(y_true, y_pred)
                    oob = rf.get_oob_score()
                    
                    run_accuracies.append(acc)
                    run_oob_scores.append(oob)
                    
                    # Keep best confusion matrix
                    if acc > best_accuracy:
                        best_accuracy = acc
                        labels = sorted(list(set(y_true)))
                        best_conf_matrix = confusion_matrix(y_true, y_pred, labels=labels)
                    
                except Exception as e:
                    print(f"  Error with N={N}, M={M}, F={F}, run={run}: {e}")
                    continue
            
            # Calculate statistics
            if run_accuracies:
                avg_acc = sum(run_accuracies) / len(run_accuracies)
                std_acc = (sum((x - avg_acc)**2 for x in run_accuracies) / len(run_accuracies))**0.5
                avg_oob = sum(run_oob_scores) / len(run_oob_scores)
                
                results.append({
                    'N': N,
                    'M': M,
                    'F': F,
                    'avg_accuracy': avg_acc,
                    'std_accuracy': std_acc,
                    'min_accuracy': min(run_accuracies),
                    'max_accuracy': max(run_accuracies),
                    'avg_oob': avg_oob,
                    'best_conf_matrix': best_conf_matrix,
                    'all_accuracies': run_accuracies
                })
                
                # Print progress every 10 configurations
                if total_configs % 10 == 0:
                    elapsed = time.time() - start_time
                    print(f"  Completed {total_configs} configurations... ({elapsed:.1f}s elapsed)")

elapsed_total = time.time() - start_time
print()
print(f"✓ Hyperparameter search complete!")
print(f"  Tested {total_configs} valid configurations")
print(f"  Total models trained: {len(results) * n_runs}")
print(f"  Time elapsed: {elapsed_total:.1f} seconds ({elapsed_total/60:.1f} minutes)")
print()

RANDOM FOREST HYPERPARAMETER TUNING

Parameter Grid:
  N (number of trees): [10, 20, 50, 100, 200]
  M (best trees): [3, 5, 7, 10, 15]
  F (features per split): [2, 4, 6, 8, 12, 18]

Runs per configuration: 5
Total configurations: 150
Total model trainings: 750

Starting hyperparameter search...
This may take several minutes...



  Completed 10 configurations... (15.6s elapsed)
  Completed 20 configurations... (31.1s elapsed)
  Completed 30 configurations... (55.8s elapsed)
  Completed 40 configurations... (84.4s elapsed)
  Completed 50 configurations... (114.7s elapsed)
  Completed 60 configurations... (170.9s elapsed)
  Completed 70 configurations... (240.9s elapsed)
  Completed 80 configurations... (313.8s elapsed)
  Completed 90 configurations... (432.6s elapsed)
  Completed 100 configurations... (582.8s elapsed)
  Completed 110 configurations... (728.7s elapsed)
  Completed 120 configurations... (960.9s elapsed)
  Completed 130 configurations... (1226.7s elapsed)
  Completed 140 configurations... (1500.4s elapsed)

✓ Hyperparameter search complete!
  Tested 144 valid configurations
  Total models trained: 720
  Time elapsed: 1614.9 seconds (26.9 minutes)



In [51]:
# Sort results by average accuracy (descending order)
results_sorted = sorted(results, key=lambda x: x['avg_accuracy'], reverse=True)

# Find best result and analyze parameter impacts
best_result = results_sorted[0]

# Analyze parameter impacts
n_impact = {}
m_impact = {}
f_impact = {}

for result in results:
    N, M, F = result['N'], result['M'], result['F']
    acc = result['avg_accuracy']
    
    if N not in n_impact:
        n_impact[N] = []
    n_impact[N].append(acc)
    
    if M not in m_impact:
        m_impact[M] = []
    m_impact[M].append(acc)
    
    if F not in f_impact:
        f_impact[F] = []
    f_impact[F].append(acc)

# Create summary dataframe
summary_data = []
for result in results_sorted[:20]:  # Top 20 configurations
    summary_data.append({
        'N': result['N'],
        'M': result['M'],
        'F': result['F'],
        'Avg_Accuracy': f"{result['avg_accuracy']:.4f}",
        'Std_Dev': f"{result['std_accuracy']:.4f}",
        'Min_Acc': f"{result['min_accuracy']:.4f}",
        'Max_Acc': f"{result['max_accuracy']:.4f}",
        'Avg_OOB': f"{result['avg_oob']:.4f}"
    })

df_summary = pd.DataFrame(summary_data)

print("="*80)
print("HYPERPARAMETER TUNING SUMMARY - TOP 20 CONFIGURATIONS")
print("="*80)
print()
print(df_summary.to_string(index=False))
print()
print("="*80)
print()

print("KEY FINDINGS:")
print()
print("1. Best Configuration:")
print(f"   N={best_result['N']}, M={best_result['M']}, F={best_result['F']}")
print(f"   Test Accuracy: {best_result['avg_accuracy']:.4f} (±{best_result['std_accuracy']:.4f})")
print()

print("2. Parameter Recommendations:")
# Find best N
best_n = max(n_impact.items(), key=lambda x: sum(x[1])/len(x[1]))
print(f"   N (trees): {best_n[0]} (avg accuracy: {sum(best_n[1])/len(best_n[1]):.4f})")

# Find best M
best_m = max(m_impact.items(), key=lambda x: sum(x[1])/len(x[1]))
print(f"   M (best trees): {best_m[0]} (avg accuracy: {sum(best_m[1])/len(best_m[1]):.4f})")

# Find best F
best_f = max(f_impact.items(), key=lambda x: sum(x[1])/len(x[1]))
print(f"   F (features): {best_f[0]} (avg accuracy: {sum(best_f[1])/len(best_f[1]):.4f})")
print()

print("3. Performance Characteristics:")
if best_result['avg_accuracy'] > 0.52:
    print(f"   ✓ Achieved {best_result['avg_accuracy']:.1%} accuracy (above random baseline)")
    print("   ✓ Model shows weak but measurable predictive signal")
elif best_result['avg_accuracy'] > 0.50:
    print(f"   ~ Achieved {best_result['avg_accuracy']:.1%} accuracy (slightly above baseline)")
    print("   ~ Limited predictive power from available features")
else:
    print(f"   ✗ Achieved {best_result['avg_accuracy']:.1%} accuracy (at/below baseline)")
    print("   ✗ Features do not predict price direction effectively")
print()

print("4. Variance Analysis:")
if best_result['std_accuracy'] < 0.02:
    print(f"   ✓ Low variance (±{best_result['std_accuracy']:.4f}) - stable predictions")
elif best_result['std_accuracy'] < 0.05:
    print(f"   ~ Moderate variance (±{best_result['std_accuracy']:.4f}) - acceptable")
else:
    print(f"   ✗ High variance (±{best_result['std_accuracy']:.4f}) - unstable")
print()

print("5. Confusion Matrix Insights (Best Configuration):")
cm = best_result['best_conf_matrix']
total = sum(sum(row) for row in cm)
print(f"   Total predictions: {total}")
print(f"   Correctly predicted Down: {cm[0][0]} / {cm[0][0] + cm[0][1]}")
print(f"   Correctly predicted Up: {cm[1][1]} / {cm[1][0] + cm[1][1]}")
print()

print("="*80)

HYPERPARAMETER TUNING SUMMARY - TOP 20 CONFIGURATIONS

  N  M  F Avg_Accuracy Std_Dev Min_Acc Max_Acc Avg_OOB
100  3  8       0.5127  0.0211  0.4732  0.5324  0.8047
100  5  8       0.5127  0.0211  0.4732  0.5324  0.8047
100  7  8       0.5127  0.0211  0.4732  0.5324  0.8047
100 10  8       0.5127  0.0211  0.4732  0.5324  0.8047
100 15  8       0.5127  0.0211  0.4732  0.5324  0.8047
 20  3 12       0.5110  0.0224  0.4704  0.5324  0.7788
 20  5 12       0.5110  0.0224  0.4704  0.5324  0.7788
 20  7 12       0.5110  0.0224  0.4704  0.5324  0.7788
 20 10 12       0.5110  0.0224  0.4704  0.5324  0.7788
 20 15 12       0.5110  0.0224  0.4704  0.5324  0.7788
200  3  8       0.5099  0.0250  0.4620  0.5324  0.8089
200  5  8       0.5099  0.0250  0.4620  0.5324  0.8089
200  7  8       0.5099  0.0250  0.4620  0.5324  0.8089
200 10  8       0.5099  0.0250  0.4620  0.5324  0.8089
200 15  8       0.5099  0.0250  0.4620  0.5324  0.8089
 10  3  6       0.5082  0.0234  0.4648  0.5324  0.7341
 10  5  6 

## Step 15: Summary of Tuning Results

Create a comprehensive summary table showing simplified results for the best configurations.

In [29]:
print("="*80)
print("PARAMETER IMPACT ANALYSIS")
print("="*80)
print()

# Analyze impact of N (number of trees)
print("1. Impact of N (Number of Trees):")
print("-" * 40)
n_impact = {}
for result in results:
    n = result['N']
    if n not in n_impact:
        n_impact[n] = []
    n_impact[n].append(result['avg_accuracy'])

for n in sorted(n_impact.keys()):
    avg = sum(n_impact[n]) / len(n_impact[n])
    print(f"  N={n:3d}: Avg Accuracy = {avg:.4f} ({len(n_impact[n])} configs)")
print()

# Analyze impact of M (best trees selected)
print("2. Impact of M (Best Trees Selected):")
print("-" * 40)
m_impact = {}
for result in results:
    m = result['M']
    if m not in m_impact:
        m_impact[m] = []
    m_impact[m].append(result['avg_accuracy'])

for m in sorted(m_impact.keys()):
    avg = sum(m_impact[m]) / len(m_impact[m])
    print(f"  M={m:2d}: Avg Accuracy = {avg:.4f} ({len(m_impact[m])} configs)")
print()

# Analyze impact of F (features per split)
print("3. Impact of F (Features per Split):")
print("-" * 40)
f_impact = {}
for result in results:
    f = result['F']
    if f not in f_impact:
        f_impact[f] = []
    f_impact[f].append(result['avg_accuracy'])

for f in sorted(f_impact.keys()):
    avg = sum(f_impact[f]) / len(f_impact[f])
    print(f"  F={f:2d}: Avg Accuracy = {avg:.4f} ({len(f_impact[f])} configs)")
print()

# Overall statistics
all_accuracies = [r['avg_accuracy'] for r in results]
print("4. Overall Statistics Across All Configurations:")
print("-" * 40)
print(f"  Mean Accuracy: {sum(all_accuracies)/len(all_accuracies):.4f}")
print(f"  Best Accuracy: {max(all_accuracies):.4f}")
print(f"  Worst Accuracy: {min(all_accuracies):.4f}")
print(f"  Std Dev: {(sum((x - sum(all_accuracies)/len(all_accuracies))**2 for x in all_accuracies) / len(all_accuracies))**0.5:.4f}")
print()
print("="*80)

PARAMETER IMPACT ANALYSIS

1. Impact of N (Number of Trees):
----------------------------------------
  N= 10: Avg Accuracy = 0.4998 (24 configs)
  N= 20: Avg Accuracy = 0.5033 (30 configs)
  N= 50: Avg Accuracy = 0.5018 (30 configs)
  N=100: Avg Accuracy = 0.5030 (30 configs)
  N=200: Avg Accuracy = 0.5033 (30 configs)

2. Impact of M (Best Trees Selected):
----------------------------------------
  M= 3: Avg Accuracy = 0.5022 (30 configs)
  M= 5: Avg Accuracy = 0.5022 (30 configs)
  M= 7: Avg Accuracy = 0.5022 (30 configs)
  M=10: Avg Accuracy = 0.5022 (30 configs)
  M=15: Avg Accuracy = 0.5028 (24 configs)

3. Impact of F (Features per Split):
----------------------------------------
  F= 2: Avg Accuracy = 0.5026 (24 configs)
  F= 4: Avg Accuracy = 0.5006 (24 configs)
  F= 6: Avg Accuracy = 0.5015 (24 configs)
  F= 8: Avg Accuracy = 0.5057 (24 configs)
  F=12: Avg Accuracy = 0.5039 (24 configs)
  F=18: Avg Accuracy = 0.4998 (24 configs)

4. Overall Statistics Across All Configuratio

## Step 14: Parameter Impact Analysis

Analyze how each parameter affects performance.

In [30]:
# Sort results by average accuracy
results_sorted = sorted(results, key=lambda x: x['avg_accuracy'], reverse=True)

print("="*80)
print("TOP 10 HYPERPARAMETER CONFIGURATIONS")
print("="*80)
print()

# Display top 10 results
for i, result in enumerate(results_sorted[:10], 1):
    print(f"{i}. N={result['N']}, M={result['M']}, F={result['F']}")
    print(f"   Average Accuracy: {result['avg_accuracy']:.4f} (±{result['std_accuracy']:.4f})")
    print(f"   Range: [{result['min_accuracy']:.4f}, {result['max_accuracy']:.4f}]")
    print(f"   Average OOB Score: {result['avg_oob']:.4f}")
    print(f"   Individual runs: {[f'{x:.4f}' for x in result['all_accuracies']]}")
    print()

print("="*80)
print()

# Find best configuration
best_result = results_sorted[0]
print("BEST CONFIGURATION:")
print(f"  N (number of trees): {best_result['N']}")
print(f"  M (best trees): {best_result['M']}")
print(f"  F (features per split): {best_result['F']}")
print()
print(f"Performance Metrics:")
print(f"  Average Test Accuracy: {best_result['avg_accuracy']:.4f} (±{best_result['std_accuracy']:.4f})")
print(f"  Best Single Run: {best_result['max_accuracy']:.4f}")
print(f"  Worst Single Run: {best_result['min_accuracy']:.4f}")
print(f"  Average OOB Score: {best_result['avg_oob']:.4f}")
print()

# Display best confusion matrix
print("Best Run Confusion Matrix:")
print(f"  Actual↓ / Predicted→")
print(f"         Down      Up")
cm = best_result['best_conf_matrix']
print(f"  Down   {cm[0][0]:4d}    {cm[0][1]:4d}")
print(f"  Up     {cm[1][0]:4d}    {cm[1][1]:4d}")
print()

# Calculate per-class metrics for best result
tp_down = cm[0][0]
fp_down = cm[1][0]
fn_down = cm[0][1]
tn_down = cm[1][1]

tp_up = cm[1][1]
fp_up = cm[0][1]
fn_up = cm[1][0]
tn_up = cm[0][0]

prec_down = tp_down / (tp_down + fp_down) if (tp_down + fp_down) > 0 else 0
rec_down = tp_down / (tp_down + fn_down) if (tp_down + fn_down) > 0 else 0
f1_down = 2 * prec_down * rec_down / (prec_down + rec_down) if (prec_down + rec_down) > 0 else 0

prec_up = tp_up / (tp_up + fp_up) if (tp_up + fp_up) > 0 else 0
rec_up = tp_up / (tp_up + fn_up) if (tp_up + fn_up) > 0 else 0
f1_up = 2 * prec_up * rec_up / (prec_up + rec_up) if (prec_up + rec_up) > 0 else 0

print("Per-Class Performance (Best Run):")
print(f"  Class 'Down':")
print(f"    Precision: {prec_down:.4f}")
print(f"    Recall:    {rec_down:.4f}")
print(f"    F1-Score:  {f1_down:.4f}")
print()
print(f"  Class 'Up':")
print(f"    Precision: {prec_up:.4f}")
print(f"    Recall:    {rec_up:.4f}")
print(f"    F1-Score:  {f1_up:.4f}")
print()
print("="*80)

TOP 10 HYPERPARAMETER CONFIGURATIONS

1. N=100, M=3, F=8
   Average Accuracy: 0.5127 (±0.0211)
   Range: [0.4732, 0.5324]
   Average OOB Score: 0.8047
   Individual runs: ['0.5099', '0.5268', '0.5211', '0.5324', '0.4732']

2. N=100, M=5, F=8
   Average Accuracy: 0.5127 (±0.0211)
   Range: [0.4732, 0.5324]
   Average OOB Score: 0.8047
   Individual runs: ['0.5099', '0.5268', '0.5211', '0.5324', '0.4732']

3. N=100, M=7, F=8
   Average Accuracy: 0.5127 (±0.0211)
   Range: [0.4732, 0.5324]
   Average OOB Score: 0.8047
   Individual runs: ['0.5099', '0.5268', '0.5211', '0.5324', '0.4732']

4. N=100, M=10, F=8
   Average Accuracy: 0.5127 (±0.0211)
   Range: [0.4732, 0.5324]
   Average OOB Score: 0.8047
   Individual runs: ['0.5099', '0.5268', '0.5211', '0.5324', '0.4732']

5. N=100, M=15, F=8
   Average Accuracy: 0.5127 (±0.0211)
   Range: [0.4732, 0.5324]
   Average OOB Score: 0.8047
   Individual runs: ['0.5099', '0.5268', '0.5211', '0.5324', '0.4732']

6. N=20, M=3, F=12
   Average Accur