Random Forest
==========

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import argparse
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [2]:
processed_df = pd.read_csv("EBITDA_new.csv")

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Select features and target variable
features = ['capitalExpenditure_',  'operatingIncome_', 
            # 'shareholdersEquity_', 'netIncome_',
            'totalAssets_', 'EPS_', 'debtToTotalAssets', 'niToAsset', 'ROA', 'revenueGrowth', 
            'quickRatio', 'rsi', 'mva', 'cfroi', 'salePrice', 'realGDPSA', 'm2SA', 
            'm2MinusM1SA', 'm2Velocity', 'primeRate', 'EBITDA_lag1']
target = 'EBITDA'

# Remove missing values
processed_df = processed_df.dropna(subset=features + [target])

# Split the dataset
X = processed_df[features]
y = processed_df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 0.8952014457656858
R^2 Score: 0.6782520770436099


In [4]:
# Feature importance
feature_importances = rf_model.feature_importances_

# Map feature importances to feature names
feature_importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)

print(feature_importance_df)

                Feature  Importance
18          EBITDA_lag1    0.558075
16           m2Velocity    0.106322
14                 m2SA    0.071796
3                  EPS_    0.036868
0   capitalExpenditure_    0.024267
4     debtToTotalAssets    0.023051
8            quickRatio    0.021809
13            realGDPSA    0.021581
15          m2MinusM1SA    0.019743
12            salePrice    0.019684
7         revenueGrowth    0.016005
2          totalAssets_    0.015495
1      operatingIncome_    0.014647
11                cfroi    0.011005
5             niToAsset    0.009612
9                   rsi    0.009526
6                   ROA    0.008603
10                  mva    0.007158
17            primeRate    0.004753
