# Bankruptcy Prediction

In [1]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

## 1. Using full data

### Load Data 5 Years

In [2]:
from scipy.io import arff
import pandas as pd

data = arff.loadarff('data/5year.arff')
df = pd.DataFrame(data[0])

#shuffle
df = df.sample(frac = 1)

#prep Data
df = df.fillna(0)
df['class'] = df['class'].astype(float, errors = 'raise')

#split
train=df.sample(frac=0.8,random_state=200) 
test=df.drop(train.index)

x, y = train.iloc[:,:-1], train.iloc[:,-1]
x_test, y_test = test.iloc[:,:-1], test.iloc[:,-1]

### Simple Logistic Regression

In [3]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0, multi_class='multinomial').fit(x, y)

res = clf.predict(x_test)
mse = mean_squared_error(y_test, res)
r2 = r2_score(y_test, res)

print("MSE: ", mse, " R2: ", r2)

MSE:  0.04341637010676157  R2:  -0.06201982651796789


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


### Random Forest Regressor

In [4]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(random_state=0)

rf.fit(x, y)

res = rf.predict(x_test)
mse = mean_squared_error(y_test, res)
r2 = r2_score(y_test, res)

print("MSE: ", mse, " R2: ", r2)

MSE:  0.023373950177935945  R2:  0.42824334572490697


### SVR

In [5]:
from sklearn.svm import SVR
svr = SVR(kernel="rbf")
svr.fit(x, y)

res = svr.predict(x_test)
mse = mean_squared_error(y_test, res)
r2 = r2_score(y_test, res)

print("MSE: ", mse, " R2: ", r2)

MSE:  0.044126429954149254  R2:  -0.07938879665724263


### GradientBoostingClassifier 

In [6]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(random_state=0).fit(x, y)

res = clf.predict(x_test)
mse = mean_squared_error(y_test, res)
r2 = r2_score(y_test, res)

print("MSE: ", mse, " R2: ", r2)

MSE:  0.027758007117437724  R2:  0.32100371747211887
