In [1]:
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestClassifier
from Methods import Methods
from RandomForest import RandomForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report

In [2]:
df = pd.read_csv("ForFlask.csv")

In [4]:
# Using Stratify
X_stratify = df.drop(columns=['Credit_Score_encoded'])
y_stratify = df['Credit_Score_encoded']

# Split the data into train, validation, and test sets with stratification
X_train_stra, X_temp_stra, y_train_stra, y_temp_stra = train_test_split(X_stratify, y_stratify, test_size = 0.2, random_state = 42, stratify = y_stratify)
X_val_stra, X_test_stra, y_val_stra, y_test_stra = train_test_split(X_temp_stra, y_temp_stra, test_size = 0.5, random_state = 42, stratify = y_temp_stra)


In [5]:
# Using RF with best parameters (GridSearchCV)
Random_Forest_stra = RandomForestClassifier(max_depth = None,
 max_features = 'sqrt',
 min_samples_leaf = 1,
 min_samples_split = 2,
 n_estimators = 300, random_state = 42)

Random_Forest_stra.fit(X_train_stra, y_train_stra)
        
# Validation
pred_val_stra = Random_Forest_stra.predict(X_val_stra)
# Evaluating validation
# Accuracy
accuracy_val_stra = accuracy_score(y_val_stra, pred_val_stra)
# Classification report
report_val_stra = classification_report(y_val_stra, pred_val_stra)

# Testing
pred_test_stra = Random_Forest_stra.predict(X_test_stra)
# Evaluating testing
# Accuracy
accuracy_test_stra = accuracy_score(y_test_stra, pred_test_stra)
# Classification report
report_test_stra = classification_report(y_test_stra, pred_test_stra) 

# Printing the results
print(f"Random Forest Classifier's validation accuracy is {accuracy_val_stra}")
print("-"*70)
print(f"Random Forest Classifier's validation classification report is: \n {report_val_stra}")
print("="*100)
print(f"Random Forest Classifier's testing accuracy is {accuracy_test_stra}")
print("-"*70)
print(f"Random Forest Classifier's testing classification report is: \n {report_test_stra}")

Random Forest Classifier's validation accuracy is 0.8392
----------------------------------------------------------------------
Random Forest Classifier's validation classification report is: 
               precision    recall  f1-score   support

           0       0.81      0.87      0.84      2900
           1       0.86      0.83      0.85      5317
           2       0.82      0.82      0.82      1783

    accuracy                           0.84     10000
   macro avg       0.83      0.84      0.83     10000
weighted avg       0.84      0.84      0.84     10000

Random Forest Classifier's testing accuracy is 0.8437
----------------------------------------------------------------------
Random Forest Classifier's testing classification report is: 
               precision    recall  f1-score   support

           0       0.83      0.87      0.85      2899
           1       0.87      0.84      0.85      5318
           2       0.81      0.81      0.81      1783

    accuracy       

In [6]:
# Saving the model to a pkl file to deploy into a webpage using Flask
with open('RFClassifierNewEncoding.pkl', 'wb') as model_file:
    pickle.dump(Random_Forest_stra, model_file)