Random Forest
==========

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import argparse
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [2]:
processed_df = pd.read_csv("EBITDA_new.csv")

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Select features and target variable
features = ['capitalExpenditure_',  'operatingIncome_', 
            # 'shareholdersEquity_', 'netIncome_',
            'totalAssets_', 'EPS_', 'debtToTotalAssets', 'niToAsset', 'ROA', 'revenueGrowth', 
            'quickRatio', 'rsi', 'mva', 'cfroi', 'salePrice', 'realGDPSA', 'm2SA', 
            'm2MinusM1SA', 'm2Velocity', 'primeRate', 'EBITDA_lag1']
target = 'EBITDA'

# Remove missing values
processed_df = processed_df.dropna(subset=features + [target])

# Split the dataset
X = processed_df[features]
y = processed_df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 0.9901091788764708
R^2 Score: 0.6441409100595397


In [None]:
# Feature importance
feature_importances = rf_model.feature_importances_

# Map feature importances to feature names
feature_importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)

print(feature_importance_df)

                Feature  Importance
20          EBITDA_lag1    0.551086
18           m2Velocity    0.117588
16                 m2SA    0.055587
2      operatingIncome_    0.038457
5                  EPS_    0.033313
10           quickRatio    0.023516
14            salePrice    0.022287
15            realGDPSA    0.017467
6     debtToTotalAssets    0.017383
9         revenueGrowth    0.016127
17          m2MinusM1SA    0.015453
8                   ROA    0.011368
3   shareholdersEquity_    0.010196
13                cfroi    0.010144
4          totalAssets_    0.009754
0   capitalExpenditure_    0.009149
7             niToAsset    0.008906
1            netIncome_    0.008809
12                  mva    0.008007
11                  rsi    0.007822
19            primeRate    0.007580
