In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# File path 
data = pd.read_excel(r"ISNERT HERE")

# Columns
columns_of_interest = [f"X{i}" for i in range(1, 28)] + ["status", "industry"]
model_data = data[columns_of_interest]

# Industries
production_industries = ['A', 'C', 'F']
service_industries = ['G', 'H', 'I', 'J', 'L', 'M', 'N', 'P', 'Q', 'R', 'S']

# Feature & Target (Prod)
production_data = model_data[model_data['industry'].isin(production_industries)]
X_prod = production_data.drop(['status', 'industry'], axis=1)
y_prod = production_data['status']
X_train_prod, X_test_prod, y_train_prod, y_test_prod = train_test_split(X_prod, y_prod, test_size=0.30, random_state=99, stratify=y_prod)

# Feature & Target (Serv)
service_data = model_data[model_data['industry'].isin(service_industries)]
X_serv = service_data.drop(['status', 'industry'], axis=1)
y_serv = service_data['status']
X_train_serv, X_test_serv, y_train_serv, y_test_serv = train_test_split(X_serv, y_serv, test_size=0.30, random_state=99, stratify=y_serv)

# RF
rf_model_prod = RandomForestClassifier(n_estimators=300, max_depth=None, max_features='log2', min_samples_leaf=2, min_samples_split=5, random_state=99)
rf_model_serv = RandomForestClassifier(n_estimators=300, max_depth=None, max_features='log2', min_samples_leaf=2, min_samples_split=5, random_state=99)
rf_model_prod.fit(X_train_prod, y_train_prod)
rf_model_serv.fit(X_train_serv, y_train_serv)

# Result prod
y_pred_prod = rf_model_prod.predict(X_test_prod)
y_proba_prod = rf_model_prod.predict_proba(X_test_prod)[:, 1]
accuracy_prod = accuracy_score(y_test_prod, y_pred_prod)
auc_score_prod = roc_auc_score(y_test_prod, y_proba_prod)

# Result serv
y_pred_serv = rf_model_serv.predict(X_test_serv)
y_proba_serv = rf_model_serv.predict_proba(X_test_serv)[:, 1]
accuracy_serv = accuracy_score(y_test_serv, y_pred_serv)
auc_score_serv = roc_auc_score(y_test_serv, y_proba_serv)

# Total
results_df = pd.DataFrame({
    "Industry Type": ["Production", "Service"],
    "Accuracy": [accuracy_prod, accuracy_serv],
    "AUC Score": [auc_score_prod, auc_score_serv]
})

# Feature importance 
feature_importances = pd.DataFrame(index=X_prod.columns, columns=['Production', 'Service'])
feature_importances['Production'] = rf_model_prod.feature_importances_
feature_importances['Service'] = rf_model_serv.feature_importances_

# Print
print("Results for Production Industries:")
print("Accuracy:", accuracy_prod)
print("AUC Score:", auc_score_prod)

print("\nResults for Service Industries:")
print("Accuracy:", accuracy_serv)
print("AUC Score:", auc_score_serv)

print("\nFeature Importance:")
print(feature_importances)

# Eksport
results_df.to_excel("results.xlsx", index=False)
feature_importances.to_excel("feature_importances.xlsx")
