In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import normalize

warnings.filterwarnings('ignore')

from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM,Flatten
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau


from sklearn.model_selection import train_test_split

from sklearn import metrics

import autosklearn.classification
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

## First we deal with the dataset "last year"

In [31]:
df=pd.read_csv("last year.csv")
df['final'][df['final']=='Bankrupted']=0
df['final'][df['final']=='Non-Bankrupted']=1

bankruptcies=df['final'][df['final']==0].count()
rows=df.shape[0]
columns=df.shape[1]
print(f'Our dataset is comprised of {rows} rows and {columns} columns. We have {bankruptcies} bankrupted companies and {rows-bankruptcies} companies that still operate')


Our dataset is comprised of 145 rows and 38 columns. We have 49 bankrupted companies and 96 companies that still operate


## Train test split and normalizing the data

In [32]:
np.random.seed(333)

X=df.iloc[:,0:-1].values
Y=df.iloc[:,-1]
X_train,X_test, y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=43) 

y_train=y_train.astype(int)
y_test=y_test.astype(int)

X_train=normalize(X_train)
X_test=normalize(X_test)



## Autosklearn

In [33]:
cls = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=300,per_run_time_limit=150,
        ml_memory_limit=20240,ensemble_size=50,ensemble_nbest=200,
        initial_configurations_via_metalearning=0,resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 5},
        seed=42)
cls.fit(X_train.copy(), y_train.copy())
cls.refit(X_train.copy(),y_train.copy())
y_pred = cls.predict(X_test)

print(cls.show_models())




42
['/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000000.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000001.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000002.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000003.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000004.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000005.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000006.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000007.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000008.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000009.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000010.ensemble', '/tmp/autosklearn_tmp_2541_3966/.auto-sklearn/ensembles/42.0000000011.ensemble', '/tmp/autosklearn_tmp_25

In [34]:
auc_sklearn_last=metrics.roc_auc_score(y_test,y_pred)
acc_sklearn_last=metrics.accuracy_score(y_test,y_pred)

print("Auc score:",auc_sklearn_last)
print("Acc score:",acc_sklearn_last)

Auc score: 0.7210526315789473
Acc score: 0.7586206896551724


## Dataset "1 years before"

In [35]:
df_1year=pd.read_csv("last year.csv")
df_1year['final'][df_1year['final']=='Bankrupted']=0
df_1year['final'][df_1year['final']=='Non-Bankrupted']=1

bankruptcies=df_1year['final'][df_1year['final']==0].count()
rows=df_1year.shape[0]
columns=df_1year.shape[1]
print(f'Our dataset is comprised of {rows} rows and {columns} columns. We have {bankruptcies} bankrupted companies and {rows-bankruptcies} companies that still operate')


Our dataset is comprised of 145 rows and 38 columns. We have 49 bankrupted companies and 96 companies that still operate


## Train test split and normalizing the data

In [36]:
np.random.seed(333)

X=df_1year.iloc[:,0:-1].values
Y=df_1year.iloc[:,-1]
X_train,X_test, y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=43) 

y_train=y_train.astype(int)
y_test=y_test.astype(int)

X_train=normalize(X_train)
X_test=normalize(X_test)



## Autosklearn

In [37]:
cls_1year = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=300,per_run_time_limit=150,
        ml_memory_limit=20240,ensemble_size=50,ensemble_nbest=200,
        initial_configurations_via_metalearning=0,resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 5},
        seed=42)
cls_1year.fit(X_train.copy(), y_train.copy())
cls_1year.refit(X_train.copy(),y_train.copy())
y_pred = cls_1year.predict(X_test)

print(cls_1year.show_models())




42
['/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000000.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000001.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000002.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000003.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000004.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000005.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000006.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000007.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000008.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000009.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000010.ensemble', '/tmp/autosklearn_tmp_2541_8033/.auto-sklearn/ensembles/42.0000000011.ensemble', '/tmp/autosklearn_tmp_25

In [38]:
auc_sklearn_1year=metrics.roc_auc_score(y_test,y_pred)
acc_sklearn_1year=metrics.accuracy_score(y_test,y_pred)

print("Auc score:",acc_sklearn_1year)
print("Acc score:",acc_sklearn_1year)

Auc score: 0.7586206896551724
Acc score: 0.7586206896551724


## Dataset "2 years before"

In [39]:
df_2year=pd.read_csv("last year.csv")
df_2year['final'][df_2year['final']=='Bankrupted']=0
df_2year['final'][df_2year['final']=='Non-Bankrupted']=1

bankruptcies=df_2year['final'][df_2year['final']==0].count()
rows=df_2year.shape[0]
columns=df_2year.shape[1]
print(f'Our dataset is comprised of {rows} rows and {columns} columns. We have {bankruptcies} bankrupted companies and {rows-bankruptcies} companies that still operate')


Our dataset is comprised of 145 rows and 38 columns. We have 49 bankrupted companies and 96 companies that still operate


## Train test split and normalizing the data

In [40]:
np.random.seed(333)

X=df_2year.iloc[:,0:-1].values
Y=df_2year.iloc[:,-1]
X_train,X_test, y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=43) 

y_train=y_train.astype(int)
y_test=y_test.astype(int)

X_train=normalize(X_train)
X_test=normalize(X_test)



## Autosklearn

In [41]:
cls_2year = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=300,per_run_time_limit=150,
        ml_memory_limit=20240,ensemble_size=50,ensemble_nbest=200,
        initial_configurations_via_metalearning=0,resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 5},
        seed=42)
cls_2year.fit(X_train.copy(), y_train.copy())
cls_2year.refit(X_train.copy(),y_train.copy())
y_pred = cls_2year.predict(X_test)

print(cls_2year.show_models())




42
['/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000000.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000001.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000002.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000003.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000004.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000005.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000006.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000007.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000008.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000009.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000010.ensemble', '/tmp/autosklearn_tmp_2541_4206/.auto-sklearn/ensembles/42.0000000011.ensemble', '/tmp/autosklearn_tmp_25

In [43]:
auc_sklearn_2year=metrics.roc_auc_score(y_test,y_pred)
acc_sklearn_2year=metrics.accuracy_score(y_test,y_pred)

print("Auc score:",auc_sklearn_2year)
print("Acc score:",acc_sklearn_2year)

Auc score: 0.6710526315789473
Acc score: 0.7241379310344828


## Results

In [46]:
data={'AUC':[auc_sklearn_1year,auc_sklearn_2year,auc_sklearn_last],
     'accuracy':[acc_sklearn_1year,acc_sklearn_2year,acc_sklearn_last]}

results=pd.DataFrame(data,index=['1 years before','2 years before','Last year'])
results

Unnamed: 0,AUC,accuracy
1 years before,0.721053,0.758621
2 years before,0.671053,0.724138
Last year,0.721053,0.758621
