In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pandas as pd
data=pd.read_excel("Com_score.xlsx")
# Encode the Label column
label_encoder = LabelEncoder()
data['Label_encoded'] = label_encoder.fit_transform(data['Label'])

# Define features (X) and target (y)
X = data[['Fin_s', 'eScore', 'sScore', 'gScore','Ops_s','IT_s','Com_s']]
y = data['Label_encoded']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

     Average       0.67      0.33      0.44         6
         Bad       0.50      0.50      0.50         2
        Good       0.33      0.67      0.44         3

    accuracy                           0.45        11
   macro avg       0.50      0.50      0.46        11
weighted avg       0.55      0.45      0.45        11



In [None]:
feature_importances = model.feature_importances_

# Display the importances (weights) for each KRI
kris = ['Fin_s', 'eScore', 'sScore', 'gScore','Ops_s','IT_s','Com_s']
weights = dict(zip(kris, feature_importances))

print("Feature Importances (Weights):")
for kri, weight in weights.items():
    print(f"{kri}: {weight:.4f}")

Feature Importances (Weights):
Fin_s: 0.3018
eScore: 0.1336
sScore: 0.1682
gScore: 0.1110
Ops_s: 0.0874
IT_s: 0.1071
Com_s: 0.0908


In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Assuming data loading is done here
data = pd.read_excel("Com_score.xlsx")

# Encode the Label column
label_encoder = LabelEncoder()
data['Label_encoded'] = label_encoder.fit_transform(data['Label'])

# Define features (X) and target (y)
X = data[['Fin_s', 'eScore', 'sScore', 'gScore', 'Ops_s', 'IT_s', 'Com_s']]
y = data['Label_encoded']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Setup the hyperparameter grid
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Initialize a RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2, scoring='accuracy')

# Fit the model
grid_search.fit(X_train, y_train)

# Best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best accuracy found: ", grid_search.best_score_)

# Predict on the testing set with the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the best model
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Fitting 3 folds for each of 288 candidates, totalling 864 fits
Best parameters found:  {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}
Best accuracy found:  0.7546296296296297
              precision    recall  f1-score   support

     Average       0.67      0.33      0.44         6
         Bad       0.50      0.50      0.50         2
        Good       0.33      0.67      0.44         3

    accuracy                           0.45        11
   macro avg       0.50      0.50      0.46        11
weighted avg       0.55      0.45      0.45        11



In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Initialize the XGBoost Classifier
xgb_model = XGBClassifier(random_state=42)

# Fit the model
xgb_model.fit(X_train, y_train)

# Predict on the test set
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred_xgb, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

     Average       0.67      0.33      0.44         6
         Bad       0.50      0.50      0.50         2
        Good       0.33      0.67      0.44         3

    accuracy                           0.45        11
   macro avg       0.50      0.50      0.46        11
weighted avg       0.55      0.45      0.45        11



In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Initialize the SVM Classifier
svm_model = SVC(kernel='linear', C=1, random_state=42)

# Fit the model
svm_model.fit(X_train, y_train)

# Predict on the test set
y_pred_svm = svm_model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred_svm, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

     Average       0.50      0.17      0.25         6
         Bad       0.25      0.50      0.33         2
        Good       0.40      0.67      0.50         3

    accuracy                           0.36        11
   macro avg       0.38      0.44      0.36        11
weighted avg       0.43      0.36      0.33        11



In [None]:
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of trees
    'learning_rate': [0.01, 0.05, 0.1],  # Step size shrinkage used to prevent overfitting
    'max_depth': [3, 4, 5, 6],  # Maximum depth of a tree
    'subsample': [0.7, 0.8, 0.9, 1],  # Subsample ratio of the training instances
    'colsample_bytree': [0.7, 0.8, 0.9, 1],  # Subsample ratio of columns when constructing each tree
}

# Initialize the XGBoost Classifier
xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2, scoring='accuracy')

# Fit GridSearchCV
grid_search.fit(X_train, y_train)

# Best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best accuracy found: ", grid_search.best_score_)

# Predict on the testing set with the best model
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)

# Evaluate the best model
print(classification_report(y_test, y_pred_best, target_names=label_encoder.classes_))

Fitting 3 folds for each of 576 candidates, totalling 1728 fits
Best parameters found:  {'colsample_bytree': 0.7, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 50, 'subsample': 0.9}
Best accuracy found:  0.6342592592592592
              precision    recall  f1-score   support

     Average       0.50      0.17      0.25         6
         Bad       0.33      0.50      0.40         2
        Good       0.33      0.67      0.44         3

    accuracy                           0.36        11
   macro avg       0.39      0.44      0.36        11
weighted avg       0.42      0.36      0.33        11



In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Assuming data loading is done here
data = pd.read_excel("Com_score.xlsx")

# Encode the Label column
label_encoder = LabelEncoder()
data['Label_encoded'] = label_encoder.fit_transform(data['Label'])

# Define features (X) and target (y)
X = data[['Fin_s', 'eScore', 'sScore', 'gScore', 'Ops_s', 'IT_s', 'Com_s']]
y = data['Label_encoded']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Setup the hyperparameter grid
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Initialize a RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2, scoring='accuracy')

# Fit the model
grid_search.fit(X_train, y_train)

# Best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best accuracy found: ", grid_search.best_score_)

# Predict on the testing set with the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the best model
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Fitting 3 folds for each of 288 candidates, totalling 864 fits
Best parameters found:  {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}
Best accuracy found:  0.7546296296296297
              precision    recall  f1-score   support

     Average       0.67      0.33      0.44         6
         Bad       0.50      0.50      0.50         2
        Good       0.33      0.67      0.44         3

    accuracy                           0.45        11
   macro avg       0.50      0.50      0.46        11
weighted avg       0.55      0.45      0.45        11



In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Assuming 'best_model' is your optimized RandomForest model from GridSearchCV

# Step 1: Extract feature importances
feature_importances = best_model.feature_importances_

# Step 2: Normalize feature importances to get weights
weights = feature_importances / np.sum(feature_importances)

# Display the weights for interpretation
feature_names = X.columns
for name, weight in zip(feature_names, weights):
    print(f"{name}: {weight}")

# Step 3: Compute the composite score for each company
# Multiply each parameter's value by its corresponding weight and sum these products
composite_scores = np.dot(X, weights)

# Step 4: Normalize these composite scores to a range of 0 to 100
scaler = MinMaxScaler(feature_range=(0, 100))
scaled_scores = scaler.fit_transform(composite_scores.reshape(-1, 1)).flatten()

# Add the scaled scores to your dataframe
data['Composite_Score'] = scaled_scores

# Step 5: Save the results to an Excel file
output_path = "Company_Composite_Scores.xlsx"
data.to_excel(output_path, index=False)

print(f"Composite scores added to the dataset and saved to {output_path}.")

Fin_s: 0.34612487543130654
eScore: 0.10732315681739336
sScore: 0.18974840276094698
gScore: 0.07201288808229211
Ops_s: 0.059989378160316714
IT_s: 0.10568859527088456
Com_s: 0.11911270347685987
Composite scores added to the dataset and saved to Company_Composite_Scores.xlsx.


In [None]:
data.head()

Unnamed: 0,Company,Fin_s,eScore,sScore,gScore,Ops_s,IT_s,Com_s,Label,Label_encoded,Composite_Score
0,0,81.901451,15.37375,2.7325,4.8775,39.65723,45.789369,8.77561,Bad,1,65.876449
1,1,36.763324,14.3825,7.205,5.78,56.184447,11.974926,47.96379,Average,0,32.675787
2,2,81.170288,4.43375,3.58875,4.38875,39.877659,52.832417,41.414103,Bad,1,74.600826
3,3,18.910755,5.0075,5.46,4.31125,73.232592,57.509228,55.835046,Good,2,30.4603
4,4,4.74128,4.25375,5.70375,3.74625,60.561174,22.353305,37.574452,Good,2,0.128785
