In [85]:
from collections import Counter
from imblearn.datasets import fetch_datasets
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report
from sklearn import preprocessing
import pickle
import os

import numpy as np
import pandas as pd

In [87]:
model_name =[]
accuracy =[]
precision = []
recall = []
f1score = []

In [88]:
def print_results(modelName, true_value, pred):

    print("Model Name: {}".format(modelName))
    model_name.append(modelName)
    print("accuracy: {}".format(accuracy_score(true_value, pred)))
    accuracy.append(accuracy_score(true_value, pred))
    print("precision: {}".format(precision_score(true_value, pred)))
    precision.append(precision_score(true_value, pred))
    print("recall: {}".format(recall_score(true_value, pred)))
    recall.append(recall_score(true_value, pred))
    print("f1: {}".format(f1_score(true_value, pred)))
    f1score.append(f1_score(true_value, pred))
    

data = pd.read_csv("boruta_features.csv")


In [89]:
data.head()

Unnamed: 0,1 net profit / total assets,2 total liabilities / total assets,6 retained earnings / total assets,5 [(cash + short-term securities + receivables - short-term liabilities) / (operating expenses - depreciation)] * 365,8 book value of equity / total liabilities,9 sales / total assets,10 equity / total assets,12 gross profit / short-term liabilities,13 (gross profit + depreciation) / sales,16 (gross profit + depreciation) / total liabilities,...,26 (net profit + depreciation) / total liabilities,27 profit on operating activities / financial expenses,29 logarithm of total assets,34 operating expenses / total liabilities,38 constant capital / total assets,46 (current assets - inventory) / short-term liabilities,51 short-term liabilities / total assets,55 working capital,64 sales / fixed assets,65 Bankrupt
0,0.20055,0.37951,0.38825,32.351,1.3305,1.1389,0.50494,0.6598,0.1666,0.73378,...,0.60411,1.4582,5.9443,0.56393,0.50591,1.5225,0.37854,348690.0,7.4277,0
1,0.18732,0.61323,0.18732,-7.3128,0.6307,1.1559,0.38677,0.33147,0.12182,0.32211,...,0.32211,1.4138,4.1424,0.3234,0.43489,0.95787,0.56511,3186.6,7.898,0
2,0.00902,0.63202,0.0,-37.842,0.58223,1.3332,0.36798,0.033921,0.038938,0.082138,...,0.073572,1.0714,5.9479,1.7697,0.49344,0.81192,0.42554,1.1263,2.5603,0
3,0.26669,0.34994,0.55983,43.087,1.8577,1.1268,0.65006,1.0993,0.12047,0.99444,...,0.80759,1.1885,3.9412,0.87075,0.69793,2.0239,0.30207,5340.0,33.413,0
4,0.067731,0.19885,0.21265,90.606,4.029,1.257,0.80115,1.8736,0.31036,0.39415,...,0.3422,2.6744,5.2684,0.27021,0.95834,2.2195,0.041664,15132.0,0.28803,0


In [90]:
X = data.drop(['65 Bankrupt'], axis =1)
y = data['65 Bankrupt']

# Random Forest Regression

In [91]:
from sklearn.ensemble import RandomForestClassifier

In [92]:
# splitting data into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2)

scaler = preprocessing.MinMaxScaler()
X = scaler.fit_transform(X)

In [93]:
# build RabdomForestClassifier model with SMOTE imblearn
rfc_pipeline = make_pipeline_imb(SMOTE(random_state=4), RandomForestClassifier(n_estimators=50))
smote_model = rfc_pipeline.fit(X_train, y_train)
smote_prediction = smote_model.predict(X_test)
filename = 'rfc_model.pckl'
pickle.dump(rfc_pipeline,open(filename,'wb'))

print()
print_results("RandomForest classification", y_test, smote_prediction)
print()



Model Name: RandomForest classification
accuracy: 0.9648033126293996
precision: 0.23076923076923078
recall: 0.3
f1: 0.2608695652173913



# Logistic Regression

In [94]:
from sklearn.linear_model import LogisticRegression

In [95]:
# build Logistic Rrgression Classifier model with SMOTE imblearn
lr_pipeline = make_pipeline_imb(SMOTE(random_state=4), LogisticRegression())
smote_model = lr_pipeline.fit(X_train, y_train)
smote_prediction = smote_model.predict(X_test)
filename = 'lr_model.pckl'
pickle.dump(lr_pipeline,open(filename,'wb'))

print()
print_results("Logistic Regression classification", y_test, smote_prediction)
print()


Model Name: Logistic Regression classification
accuracy: 0.7712215320910973
precision: 0.0759493670886076
recall: 0.9
f1: 0.14007782101167315



# Neural Nets

In [96]:
from sklearn.neural_network import MLPClassifier

In [97]:
# build Neural Nets Classifier model with SMOTE imblearn
nn_pipeline = make_pipeline_imb(SMOTE(random_state=4), MLPClassifier())
smote_model = nn_pipeline.fit(X_train, y_train)
smote_prediction = smote_model.predict(X_test)
filename = 'nn_model.pckl'
pickle.dump(nn_pipeline,open(filename,'wb'))

print()
print_results("Neural Nets", y_test, smote_prediction)
print()


Model Name: Neural Nets
accuracy: 0.6749482401656315
precision: 0.040625
recall: 0.65
f1: 0.07647058823529412



# BernoulliNB

In [103]:
from sklearn.naive_bayes import BernoulliNB

In [104]:
# build SVC model with SMOTE imblearn
svc_pipeline = make_pipeline_imb(SMOTE(random_state=4), BernoulliNB())
smote_model = svc_pipeline.fit(X_train, y_train)
smote_prediction = smote_model.predict(X_test)
filename = 'BernoulliNB_model.pckl'
pickle.dump(svc_pipeline,open(filename,'wb'))

print()
print_results("BernoulliNB", y_test, smote_prediction)
print()



Model Name: BernoulliNB
accuracy: 0.855072463768116
precision: 0.03125
recall: 0.2
f1: 0.05405405405405406



In [100]:
info = model_name,accuracy,precision,recall,f1score

In [105]:
describe1 = pd.DataFrame(info[0],columns = {"Model_Name"})
describe2 = pd.DataFrame(info[1], columns ={"Accuracy_score"})
describe3 = pd.DataFrame(info[2],columns = {"Precision_score"})
describe4 = pd.DataFrame(info[3],columns = {"Recall_score"})
describe5 = pd.DataFrame(info[4],columns = {"F1_score"})



des = describe1.merge(describe2, left_index=True, right_index=True, how='inner')
des = des.merge(describe3,left_index=True, right_index=True, how='inner')
des = des.merge(describe4,left_index=True, right_index=True, how='inner')
des = des.merge(describe5,left_index=True, right_index=True, how='inner')

#des = des.merge(describe9,left_index=True, right_index=True, how='inner')
final_csv = des.sort_values(ascending=False,by="Accuracy_score").reset_index(drop = True)

In [106]:
final_csv.to_csv(str(os.getcwd()) + "/accuracy_error_metrics.csv")