In [7]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

from xgboost import XGBRegressor
from xgboost import plot_importance
from xgboost import plot_tree
from xgboost import XGBClassifier

from patsy import dmatrices
import numpy as np
import pandas as pd

In [8]:
dataset = pd.read_csv(r"C:\Users\JacopoBinati\OneDrive - Venionaire Capital\Desktop\crunchbase\analysis\dataset_without_investors.csv")

In [9]:
pd.set_option('display.max_columns', None)

#### now all we need to do before running the models is to get only float, bool, int variables

In [10]:
columns_to_drop = [
    "Organization Name",
    "IPO Status",
    "Company Type",
    "Number of Employees",
    "Last Funding Type",
    "Last Funding Date",
    "Founded Date",
    "Founded Date Precision",
    "Operating Status",
    "Top 5 Investors",
    "Headquarters Location",
    "Industry Groups",
    "Industries",
    "Headquarters Regions",
    "Main Industry",
    "Industry Name"

]
dataset = dataset.drop(columns=columns_to_drop, axis=1)

#### Need to be dropped all the variables which are not dummies, integers or float

In [11]:
dataset

Unnamed: 0,Number of Founders,Number of Funding Rounds,Last Funding Amount (in USD),Last Equity Funding Amount (in USD),Total Equity Funding Amount (in USD),Total Funding Amount (in USD),Number of Investors,Status__Private,CompanyType__For Profit,CompanyType__Non-profit,NumberEmployees__1-10,NumberEmployees__10001+,NumberEmployees__1001-5000,NumberEmployees__101-250,NumberEmployees__11-50,NumberEmployees__251-500,NumberEmployees__5001-10000,NumberEmployees__501-1000,NumberEmployees__51-100,Last Funding Year,FundingType__Angel,FundingType__Convertible Note,FundingType__Corporate Round,FundingType__Debt Financing,FundingType__Equity Crowdfunding,FundingType__Grant,FundingType__Initial Coin Offering,FundingType__Non-equity Assistance,FundingType__Post-IPO Equity,FundingType__Pre-Seed,FundingType__Private Equity,FundingType__Secondary Market,FundingType__Seed,FundingType__Series A,FundingType__Series B,FundingType__Series C,FundingType__Series D,FundingType__Series E,FundingType__Series F,FundingType__Series G,FundingType__Series H,FundingType__Series I,FundingType__Series J,FundingType__Undisclosed,FundingType__Venture - Series Unknown,Industry_Aerospace/Defense,Industry_Air Transport,Industry_Apparel,Industry_Auto & Truck,Industry_Auto Parts,Industry_Bank (Money Center),Industry_Banks (Regional),Industry_Beverage (Soft),Industry_Brokerage & Investment Banking,Industry_Building Materials,Industry_Business & Consumer Services,Industry_Chemical (Basic),Industry_Chemical (Diversified),Industry_Chemical (Specialty),Industry_Coal & Related Energy,Industry_Computer Services,Industry_Computers/Peripherals,Industry_Diversified,Industry_Drugs (Biotechnology),Industry_Drugs (Pharmaceutical),Industry_Education,Industry_Electrical Equipment,Industry_Electronics (Consumer & Office),Industry_Electronics (General),Industry_Engineering/Construction,Industry_Entertainment,Industry_Environmental & Waste Services,Industry_Farming/Agriculture,Industry_Financial Svcs. (Non-bank & Insurance),Industry_Food Processing,Industry_Food Wholesalers,Industry_Green & Renewable Energy,Industry_Healthcare Support Services,Industry_Heathcare Information and Technology,Industry_Homebuilding,Industry_Hotel/Gaming,Industry_Household Products,Industry_Information Services,Industry_Insurance (General),Industry_Investments & Asset Management,Industry_Machinery,Industry_Power,Industry_Publishing & Newspapers,Industry_Real Estate (General/Diversified),Industry_Real Estate (Operations & Services),Industry_Recreation,Industry_Restaurant/Dining,Industry_Retail (Automotive),Industry_Retail (General),Industry_Software (Entertainment),Industry_Software (Internet),Industry_Software (System & Application),Industry_Telecom. Equipment,Industry_Telecom. Services,Industry_Transportation,Industry_Transportation (Railroads),Macro Region,Region_Asia-Pacific,Region_EMEA,Region_Latin America,Region_North America,LastFundingType__Angel,LastFundingType__Convertible Note,LastFundingType__Corporate Round,LastFundingType__Debt Financing,LastFundingType__Equity Crowdfunding,LastFundingType__Grant,LastFundingType__Initial Coin Offering,LastFundingType__Non-equity Assistance,LastFundingType__Post-IPO Equity,LastFundingType__Pre-Seed,LastFundingType__Private Equity,LastFundingType__Secondary Market,LastFundingType__Seed,LastFundingType__Series A,LastFundingType__Series B,LastFundingType__Series C,LastFundingType__Series D,LastFundingType__Series E,LastFundingType__Series F,LastFundingType__Series G,LastFundingType__Series H,LastFundingType__Series I,LastFundingType__Series J,LastFundingType__Undisclosed,LastFundingType__Venture - Series Unknown,Lease Debt (My Estimate),Conventional Debt,Total Debt with leases,Interest expense,Book interest rate,Short term Debt as % of Total Debt,Lease Debt (Accounting),Debt Repaid during year,Debt raised during year,Dividends,Net Income,Payout,Dividends + Buybacks,Dividends + Buybacks - Stock Issuances,FCFE (before debt cash flows),FCFE (after debt cash flows),Net Cash Returned/ Net Income,Cash/ Firm Value,Return on Equity,Payout Ratio after Issuances,Debt Repaid to Debt Raised Ratio,Price to Cash Flow Ratio
0,1.0,3.0,0.0,0.0,0.0,2192645.0,1.0,1,1,0,1,0,0,0,0,0,0,0,0,2024,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,EMEA,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6051.020718,1.442069e+05,1.502580e+05,5696.207,0.039500,0.144841,5218.757,48955.632,52128.388,11791.096170,30321.252,0.388872,20293.263,18261.499,19919.943,23092.699,0.602267,0.055934,-0.201795,0.602267,0.939136,2.422143e-06
1,3.0,4.0,280498.0,0.0,0.0,482615.0,3.0,1,1,0,1,0,0,0,0,0,0,0,0,2019,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,EMEA,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5115.374266,2.113760e+05,2.164914e+05,8584.262,0.040611,0.268148,5703.164,88778.264,90419.615,12664.507115,30400.966,0.416582,13271.676,4914.572,-4773.092,-3131.741,0.161658,0.121170,-0.140426,0.161658,0.981847,-3.869098e-05
2,1.0,1.0,0.0,0.0,0.0,0.0,3.0,1,1,0,0,0,0,0,1,0,0,0,0,2022,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Asia-Pacific,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3504.302194,2.667316e+04,3.017747e+04,1902.260,0.071317,0.083450,2872.976,7817.561,6397.430,1473.077400,2552.638,0.577080,2602.829,1797.809,871.547,-548.584,0.704295,0.100330,-0.084588,0.704295,1.221985,-1.828888e-04
3,2.0,1.0,0.0,0.0,0.0,0.0,2.0,1,1,0,1,0,0,0,0,0,0,0,0,2022,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,EMEA,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34683.033966,3.542238e+05,3.889068e+05,13713.992,0.038716,0.113633,30861.610,55031.120,73774.102,33027.515310,100414.069,0.328913,90193.521,73280.795,77434.434,96177.416,0.729786,0.034412,-0.258196,0.729786,0.745941,3.577958e-07
4,1.0,4.0,0.0,0.0,0.0,0.0,3.0,1,1,0,0,0,0,0,0,0,0,0,1,2023,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,EMEA,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13525.815996,2.218906e+05,2.354164e+05,12527.887,0.056460,0.072701,12420.244,83664.108,87802.673,19190.627928,35290.150,0.543796,39703.544,34789.306,34121.103,38259.668,0.985808,0.053849,-0.149905,0.985808,0.952865,1.407473e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4670,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1,1,0,0,1,0,0,0,0,0,0,0,2020,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Asia-Pacific,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,14136.992030,2.219874e+06,2.234011e+06,85726.073,0.038618,0.094404,9650.130,762913.057,847793.955,103167.411630,163591.592,0.630640,108730.021,51954.302,-35695.614,49185.284,0.317585,0.050316,-0.073228,0.317585,0.899880,1.022993e-06
4671,1.0,3.0,14232849.0,14232849.0,58385810.0,58385810.0,8.0,1,1,0,0,1,0,0,0,0,0,0,0,2019,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,Asia-Pacific,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,34205.228154,3.799094e+05,4.141146e+05,14361.511,0.037802,0.096561,33057.511,123680.593,124790.676,20408.731530,40449.503,0.504548,34969.900,30134.710,27712.584,28822.667,0.744996,0.077808,-0.097677,0.744996,0.991104,2.699534e-06
4672,2.0,2.0,3600000.0,3600000.0,3600000.0,3600000.0,4.0,1,1,0,0,1,0,0,0,0,0,0,0,2021,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,EMEA,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,7561.645723,1.439803e+05,1.515419e+05,5333.830,0.037046,0.026053,5387.660,25808.315,35401.202,3826.016720,15662.845,0.244273,13954.663,9892.479,9931.899,19524.786,0.631589,0.044372,-0.103357,0.631589,0.729024,2.272579e-06
4673,1.0,1.0,2887920.0,2887920.0,2887920.0,2887920.0,1.0,1,1,0,0,1,0,0,0,0,0,0,0,2019,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Asia-Pacific,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2931.501623,1.292436e+05,1.321751e+05,5715.974,0.044226,0.077713,2606.955,93975.047,99762.801,5409.587560,8563.891,0.631674,8419.115,5352.219,-1210.232,4577.522,0.624975,0.040675,-0.064792,0.624975,0.941985,8.885773e-06


## Get preliminary information with PyCaret for feature importance, SHAP and xgboost hypertuning

# new session

In [12]:
X = dataset.drop('Return on Equity', axis=1)
y = dataset['Return on Equity']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.26, random_state=42)


## XG Boost Classifier

In [14]:
param_grid = {
    'n_estimators': [400, 800],
    'max_depth': [6, 10],
    'learning_rate': [0.01, 0.1],
    'colsample_bytree': [0.1, 0.5]
}

# Initialize the XGBClassifier
xgb = XGBClassifier(objective='binary:logistic', random_state=42)

# Set up GridSearchCV
grid_search = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    verbose=2
)

# Fit the model
grid_search.fit(X_train, y_train)

# Print the best parameters
best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

# Get the best estimator
best_xgb = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_xgb.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test set: {accuracy:.4f}")

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Fitting 5 folds for each of 16 candidates, totalling 80 fits


ValueError: 
All the 80 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
48 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\JacopoBinati\AppData\Roaming\Python\Python310\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\JacopoBinati\anaconda3\envs\jacopo\lib\site-packages\xgboost\core.py", line 726, in inner_f
    return func(**kwargs)
  File "c:\Users\JacopoBinati\anaconda3\envs\jacopo\lib\site-packages\xgboost\sklearn.py", line 1491, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54], got [-0.73554542 -0.58596631 -0.47132194 -0.30812263 -0.26228825 -0.25819571
 -0.25687054 -0.24894539 -0.23219664 -0.22573489 -0.21773761 -0.21071567
 -0.20185436 -0.20179473 -0.1991987  -0.19556141 -0.18196046 -0.14990524
 -0.14968418 -0.14042582 -0.1399881  -0.13699866 -0.13144118 -0.1313902
 -0.1265943  -0.12286034 -0.12246713 -0.12217058 -0.10834646 -0.10335656
 -0.09767709 -0.0946577  -0.08655712 -0.08458783 -0.07336645 -0.07322775
 -0.07141723 -0.06992585 -0.06501345 -0.06479203 -0.05954682 -0.05918847
 -0.05914323 -0.05393498 -0.05371936 -0.04659408 -0.04520184 -0.0451679
 -0.04362432 -0.03469631 -0.021776   -0.01676733  0.03206205  0.11304353
  0.11929202]

--------------------------------------------------------------------------------
16 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\JacopoBinati\AppData\Roaming\Python\Python310\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\JacopoBinati\anaconda3\envs\jacopo\lib\site-packages\xgboost\core.py", line 726, in inner_f
    return func(**kwargs)
  File "c:\Users\JacopoBinati\anaconda3\envs\jacopo\lib\site-packages\xgboost\sklearn.py", line 1491, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51], got [-0.73554542 -0.47132194 -0.26228825 -0.25819571 -0.25687054 -0.24894539
 -0.23219664 -0.22573489 -0.21773761 -0.21071567 -0.20185436 -0.20179473
 -0.1991987  -0.19556141 -0.18196046 -0.14990524 -0.14968418 -0.14042582
 -0.1399881  -0.13699866 -0.13144118 -0.1313902  -0.1265943  -0.12286034
 -0.12246713 -0.12217058 -0.10834646 -0.10335656 -0.09767709 -0.0946577
 -0.08655712 -0.08458783 -0.07336645 -0.07322775 -0.07141723 -0.06992585
 -0.06501345 -0.06479203 -0.05954682 -0.05918847 -0.05914323 -0.05393498
 -0.05371936 -0.04659408 -0.04520184 -0.04362432 -0.03469631 -0.021776
 -0.01676733  0.03206205  0.11304353  0.11929202]

--------------------------------------------------------------------------------
16 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\JacopoBinati\AppData\Roaming\Python\Python310\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\JacopoBinati\anaconda3\envs\jacopo\lib\site-packages\xgboost\core.py", line 726, in inner_f
    return func(**kwargs)
  File "c:\Users\JacopoBinati\anaconda3\envs\jacopo\lib\site-packages\xgboost\sklearn.py", line 1491, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53], got [-0.73554542 -0.58596631 -0.47132194 -0.30812263 -0.26228825 -0.25819571
 -0.25687054 -0.24894539 -0.23219664 -0.22573489 -0.21773761 -0.21071567
 -0.20185436 -0.20179473 -0.1991987  -0.19556141 -0.14990524 -0.14968418
 -0.14042582 -0.1399881  -0.13699866 -0.13144118 -0.1313902  -0.1265943
 -0.12286034 -0.12246713 -0.12217058 -0.10834646 -0.10335656 -0.09767709
 -0.0946577  -0.08655712 -0.08458783 -0.07336645 -0.07322775 -0.07141723
 -0.06992585 -0.06501345 -0.06479203 -0.05954682 -0.05918847 -0.05914323
 -0.05393498 -0.05371936 -0.04659408 -0.04520184 -0.0451679  -0.04362432
 -0.03469631 -0.021776   -0.01676733  0.03206205  0.11304353  0.11929202]


## Random Forest Classifier

In [None]:



param_grid = {
    'n_estimators': [50, 70],
    'max_depth': [20, 30],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

rfr = RandomForestRegressor(
    random_state=42,
    criterion='mse',
    n_estimators=70,
    oob_score=True,
    n_jobs=-1
)

grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    scoring="neg_root_mean_squared_error",
    n_jobs=-1,
    verbose=4
)

grid_search.fit(X, y.values.ravel())

best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

# Get the best estimator
best_rf = grid_search.best_estimator_

In [None]:
y_holdout, X_holdout = dmatrices(formula, data_holdout, return_type='dataframe')

y_pred = best_rf.predict(X_holdout)


In [None]:
mse = mean_squared_error(y_holdout, y_pred)
print("Mean Squared Error on holdout set: ", mse)

In [None]:

cv_results = pd.DataFrame(grid_search.cv_results_)


# Metrics for the performance

In [None]:
pivot_results = cv_results[
    ["param_max_features", "param_min_samples_split", "mean_test_score"]
].assign(
    mean_test_score=lambda x: -x["mean_test_score"],
    Variables=lambda x: x["param_max_features"],
    Min_nodes=lambda x: x["param_min_samples_split"]
).pivot(
    index="Min_nodes", columns="Variables", values="mean_test_score"
).round(2)

print("Cross-validation results:")
print(pivot_results)

## Feature Importance

In [None]:
importances = best_rf.feature_importances_
feature_names = X.columns  # Get feature names from X
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(12, 8))
plt.title("Feature Importances")
plt.bar(range(X.shape[1]), importances[indices])
plt.xticks(range(X.shape[1]), [feature_names[i] for i in indices], rotation=90)
plt.tight_layout()
plt.show()

# Print feature importances
print("\nFeature Importances:")
for f, imp in zip([feature_names[i] for i in indices], importances[indices]):
    print(f"{f}: {imp:.4f}")

####################################################################

importances_xgb = best_xgb.feature_importances_
feature_names_xgb = X.columns
indices_xgb = np.argsort(importances_xgb)[::-1]

plt.figure(figsize=(12, 8))
plt.title("Feature Importances XGB")
plt.bar(range(X.shape[1]), importances_xgb[indices_xgb])
plt.xticks(range(X.shape[1]), [feature_names_xgb[i] for i in indices_xgb], rotation=90)
plt.tight_layout()
plt.show()

# Print feature importances
print("\nFeature Importances XGB:")
for f, imp in zip([feature_names_xgb[i] for i in indices_xgb], importances_xgb[indices_xgb]):
    print(f"{f}: {imp:.4f}")
