In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# File Path
data = pd.read_excel(r"INSERT HERE")

# Columns
columns_of_interest = [f"X{i}" for i in range(1, 28)] + ["status", "industry"]
model_data = data[columns_of_interest]

# Feature & Target
X = model_data.drop(['status', 'industry'], axis=1)
y = model_data['status']

# Splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=99, stratify=y)

# RF
rf_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    max_features='log2',
    min_samples_leaf=2,
    min_samples_split=5,
    random_state=99
)

# Train
rf_model.fit(X_train, y_train)


industries = ['A', 'C', 'F', 'G', 'H', 'I', 'J', 'L', 'M', 'N', 'P', 'Q', 'R']

# Results
results_df = pd.DataFrame()
feature_importances = pd.DataFrame(columns=X.columns)

# Deploy
for industry in industries:
    industry_data = model_data[model_data['industry'] == industry]
    X_industry = industry_data.drop(['status', 'industry'], axis=1)
    y_industry = industry_data['status']

    if not X_industry.empty:
        y_pred = rf_model.predict(X_industry)
        y_proba = rf_model.predict_proba(X_industry)[:, 1]
        
        accuracy = accuracy_score(y_industry, y_pred)
        auc_score = roc_auc_score(y_industry, y_proba)

        
        new_row = pd.DataFrame({'Industry': [industry], 'Accuracy': [accuracy], 'AUC Score': [auc_score]})
        results_df = pd.concat([results_df, new_row], ignore_index=True)
        feature_importances.loc[industry] = rf_model.feature_importances_
    else:
        print(f"ERROR {industry}.")

# Eksport results
results_df.to_excel("model_results.xlsx", index=False)
feature_importances.to_excel("feature_importances.xlsx")