#  With Labels and Features Distress and Non-Distress Banks

## Before Crisis

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Data
data = {
    'Bank': ['RBS', 'Credit Suisse', 'Deutsche Bank', 'Banco', 'Barclays','BBVA', 'BNP', 'CBK', 'Danske', 'Ereste', 'HSBC', 'Intensa','KBC', 'Lloyds', 'Nordea', 'Skanden', 'Societe', 'Handelsbanken','Swedbank', 'UBS', 'Unicredit'],
    'Year_2005': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, 0.38],
    'Year_2006': [0, 0.2, 0, 0.06, 0, 0, 0, 0, 0, 0, 0 , 0.4 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0],
    'Year_2007': [0.5, 0, 0.04, 0 , 0.29 , 0 ,0 , 0.05 ,0 ,0 ,0.01 ,0 ,0 ,0,0 ,0 ,0.9 , 0.05 ,0.01 ,0.14 ,0.8 ],
    'Distress': ['Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
}

# Create DataFrame
df = pd.DataFrame(data)

# Split data into features (X) and labels (y)
X = df[['Year_2005', 'Year_2006', 'Year_2007']]  # Features
y = df['Distress']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
type1_error = confusion[0, 1] / (confusion[0, 0] + confusion[0, 1]) if sum(confusion[0]) > 0 else 0  # False positive rate
type2_error = confusion[1, 0] / (confusion[1, 0] + confusion[1, 1]) if sum(confusion[1]) > 0 else 0  # False negative rate

# Print results
print("Accuracy:", accuracy*100)
print("Type 1 Error (False Positive Rate):", type1_error*100)
print("Type 2 Error (False Negative Rate):", type2_error*100)


Accuracy: 60.0
Type 1 Error (False Positive Rate): 0.0
Type 2 Error (False Negative Rate): 100.0


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, brier_score_loss

data = {
    'Bank': ['Bank A', 'Bank B', 'Bank C', 'Bank D', 'Bank E', 'Bank F', 'Bank G', 'Bank H', 'Bank I', 'Bank J','Bank I','Bank K','Bank 12','Bank 13','Bank 14','Bank 15','Bank 16','Bank 17','Bank 18','Bank 19','Bank 20'],
    'Year_2005': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, 0.38],
    'Year_2006': [0, 0.2, 0, 0.06, 0, 0, 0, 0, 0, 0, 0 , 0.4 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0],
    'Year_2007': [0.5, 0, 0.04, 0 , 0.29 , 0 ,0 , 0.05 ,0 ,0 ,0.01 ,0 ,0 ,0,0 ,0 ,0.9 , 0.05 ,0.01 ,0.14 ,0.8 ],
    'Distress': ['Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
   
}

# Create DataFrame
df = pd.DataFrame(data)

# Split data into features (X) and labels (y)
X = df[['Year_2005', 'Year_2006', 'Year_2007']]  # Features
y = df['Distress']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate sensitivity (True Positive Rate)
confusion = confusion_matrix(y_test, y_pred)
sensitivity = confusion[1, 1] / (confusion[1, 0] + confusion[1, 1]) if sum(confusion[1]) > 0 else 0

# Calculate specificity (True Negative Rate)
specificity = confusion[0, 0] / (confusion[0, 0] + confusion[0, 1]) if sum(confusion[0]) > 0 else 0

# Calculate AUC
y_pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)

# Calculate Brier score
brier_score = brier_score_loss(y_test == 'Yes', y_pred_proba)

# Print results
print("Sensitivity (True Positive Rate):", sensitivity*100)
print("Specificity (True Negative Rate):", specificity*100)
print("AUC (Area Under the ROC Curve):", auc*100)
print("Brier Score:", brier_score*100)


Sensitivity (True Positive Rate): 0.0
Specificity (True Negative Rate): 100.0
AUC (Area Under the ROC Curve): 58.33333333333333
Brier Score: 22.743794499361154


# During Crisis 

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Data
data = {
    'Bank': ['RBS', 'Credit Suisse', 'Deutsche Bank', 'Banco', 'Barclays','BBVA', 'BNP', 'CBK', 'Danske', 'Ereste', 'HSBC', 'Intensa','KBC', 'Lloyds', 'Nordea', 'Skanden', 'Societe', 'Handelsbanken','Swedbank', 'UBS', 'Unicredit'],
     'Year_2008': [0.6,0.08,0.17,0.15,0.31,0.15,0.15,0.37,0.17,0.35,0.16,0.06,0.26,0.11,0.12,0.11,0.14,0,0.2,0.10,0.15],
    'Year_2009' : [0.21,0.01,0.09,0.18,0.03,0.02,0,0.29,0.02,0.02,0,0,0,0.91,0.09,0.05,0.02,0.01,0.07,0.05,0.02],
    'Distress': ['Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
}

# Create DataFrame
df = pd.DataFrame(data)

# Split data into features (X) and labels (y)
X = df[['Year_2008', 'Year_2009']]  # Features
y = df['Distress']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
type1_error = confusion[0, 1] / (confusion[0, 0] + confusion[0, 1]) if sum(confusion[0]) > 0 else 0  # False positive rate
type2_error = confusion[1, 0] / (confusion[1, 0] + confusion[1, 1]) if sum(confusion[1]) > 0 else 0  # False negative rate

# Print results
print("Accuracy:", accuracy*100)
print("Type 1 Error (False Positive Rate):", type1_error*100)
print("Type 2 Error (False Negative Rate):", type2_error*100)


Accuracy: 60.0
Type 1 Error (False Positive Rate): 0.0
Type 2 Error (False Negative Rate): 100.0


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, brier_score_loss


data = {
    'Bank': ['Bank A', 'Bank B', 'Bank C', 'Bank D', 'Bank E', 'Bank F', 'Bank G', 'Bank H', 'Bank I', 'Bank J','Bank I','Bank K','Bank 12','Bank 13','Bank 14','Bank 15','Bank 16','Bank 17','Bank 18','Bank 19','Bank 20'],
    'Year_2008': [0.6,0.08,0.17,0.15,0.31,0.15,0.15,0.37,0.17,0.35,0.16,0.06,0.26,0.11,0.12,0.11,0.14,0,0.2,0.10,0.15],
    'Year_2009' : [0.21,0.01,0.09,0.18,0.03,0.02,0,0.29,0.02,0.02,0,0,0,0.91,0.09,0.05,0.02,0.01,0.07,0.05,0.02],
    'Distress': ['Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
   
}

# Create DataFrame
df = pd.DataFrame(data)

# Split data into features (X) and labels (y)
X = df[['Year_2008', 'Year_2009']]  # Features
y = df['Distress']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate sensitivity (True Positive Rate)
confusion = confusion_matrix(y_test, y_pred)
sensitivity = confusion[1, 1] / (confusion[1, 0] + confusion[1, 1]) if sum(confusion[1]) > 0 else 0

# Calculate specificity (True Negative Rate)
specificity = confusion[0, 0] / (confusion[0, 0] + confusion[0, 1]) if sum(confusion[0]) > 0 else 0

# Calculate AUC
y_pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)

# Calculate Brier score
brier_score = brier_score_loss(y_test == 'Yes', y_pred_proba)

# Print results
print("Sensitivity (True Positive Rate):", sensitivity*100)
print("Specificity (True Negative Rate):", specificity*100)
print("AUC (Area Under the ROC Curve):", auc*100)
print("Brier Score:", brier_score*100)

Sensitivity (True Positive Rate): 0.0
Specificity (True Negative Rate): 100.0
AUC (Area Under the ROC Curve): 0.0
Brier Score: 24.809722919736878


# After Crisis

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Data
data = {
    'Bank': ['RBS', 'Credit Suisse', 'Deutsche Bank', 'Banco', 'Barclays','BBVA', 'BNP', 'CBK', 'Danske', 'Ereste', 'HSBC', 'Intensa','KBC', 'Lloyds', 'Nordea', 'Skanden', 'Societe', 'Handelsbanken','Swedbank', 'UBS', 'Unicredit'],
     'Year_2010': [0.02,0.02,0.05,0,0.02,0.05,0,1,0,0.08,0,0.02,0.16,0.015,0.017,0.03,0.22,0.09,0.01,0.06,0.03],
    'Year_2011':[0.08,0.1,0.03,0.25,0.04,0.03,0.03,0.54,0.01,0.09,0,0.05,0.39,0.01,0.03,0,0.19,0,0.01,0.02,0.1],
    'Year_2012':[0,0,0,0.06,0,0,0.02,0.99,0.04,0.01,0.01,0.15,0.07,0.14,0,0.17,0,0.05,0.02,0.08,0.08],
    'Year_2013':[0,0,0,0,0.01,0,0.05,0.06,0,0.03,0,0.01,0,0,0,0,0,0.08,0.09,0.01,0.05],
    'Year_2014':[0,0,0,0,0.05,1,1,1,0,0.02,0,0,0,0,0,0,0.01,0,0.5,0.03,0.03],
    'Distress': ['Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
}

# Create DataFrame
df = pd.DataFrame(data)

# Split data into features (X) and labels (y)
X = df[['Year_2010', 'Year_2011','Year_2012','Year_2013','Year_2014']]  # Features
y = df['Distress']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
type1_error = confusion[0, 1] / (confusion[0, 0] + confusion[0, 1]) if sum(confusion[0]) > 0 else 0  # False positive rate
type2_error = confusion[1, 0] / (confusion[1, 0] + confusion[1, 1]) if sum(confusion[1]) > 0 else 0  # False negative rate

# Print results
print("Accuracy:", accuracy*100)
print("Type 1 Error (False Positive Rate):", type1_error*100)
print("Type 2 Error (False Negative Rate):", type2_error*100)


Accuracy: 60.0
Type 1 Error (False Positive Rate): 0.0
Type 2 Error (False Negative Rate): 100.0


In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, brier_score_loss


data = {
    'Bank': ['Bank A', 'Bank B', 'Bank C', 'Bank D', 'Bank E', 'Bank F', 'Bank G', 'Bank H', 'Bank I', 'Bank J','Bank I','Bank K','Bank 12','Bank 13','Bank 14','Bank 15','Bank 16','Bank 17','Bank 18','Bank 19','Bank 20'],
   'Year_2010': [0.02,0.02,0.05,0,0.02,0.05,0,1,0,0.08,0,0.02,0.16,0.015,0.017,0.03,0.22,0.09,0.01,0.06,0.03],
    'Year_2011':[0.08,0.1,0.03,0.25,0.04,0.03,0.03,0.54,0.01,0.09,0,0.05,0.39,0.01,0.03,0,0.19,0,0.01,0.02,0.1],
    'Year_2012':[0,0,0,0.06,0,0,0.02,0.99,0.04,0.01,0.01,0.15,0.07,0.14,0,0.17,0,0.05,0.02,0.08,0.08],
    'Year_2013':[0,0,0,0,0.01,0,0.05,0.06,0,0.03,0,0.01,0,0,0,0,0,0.08,0.09,0.01,0.05],
    'Year_2014':[0,0,0,0,0.05,1,1,1,0,0.02,0,0,0,0,0,0,0.01,0,0.5,0.03,0.03],
    'Distress': ['Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
   
}

# Create DataFrame
df = pd.DataFrame(data)

# Split data into features (X) and labels (y)
X = df[['Year_2010', 'Year_2011','Year_2012','Year_2013','Year_2014']]  # Features
y = df['Distress']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate sensitivity (True Positive Rate)
confusion = confusion_matrix(y_test, y_pred)
sensitivity = confusion[1, 1] / (confusion[1, 0] + confusion[1, 1]) if sum(confusion[1]) > 0 else 0

# Calculate specificity (True Negative Rate)
specificity = confusion[0, 0] / (confusion[0, 0] + confusion[0, 1]) if sum(confusion[0]) > 0 else 0

# Calculate AUC
y_pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)

# Calculate Brier score
brier_score = brier_score_loss(y_test == 'Yes', y_pred_proba)

# Print results
print("Sensitivity (True Positive Rate):", sensitivity*100)
print("Specificity (True Negative Rate):", specificity*100)
print("AUC (Area Under the ROC Curve):", auc*100)
print("Brier Score:", brier_score*100)

Sensitivity (True Positive Rate): 0.0
Specificity (True Negative Rate): 100.0
AUC (Area Under the ROC Curve): 66.66666666666667
Brier Score: 24.172315582320188


# Final Output Metrics

In [32]:
import pandas as pd

data = {
    'Time':['Before Crisis','During Crisis','After Crisis'],
    'Avg Accuracy': [60, 60, 60],
    'Type I Error': [0.0, 0, 0],
    'Type II Error': [100, 100, 100],
    'Senstivity':[0.0,0.0,0.0],
    'Specificity': [100,100,100],
     'AUC':[58.3 ,0,66.6],
     'Brier Score': [22.7, 24.8, 24.17]
}


df = pd.DataFrame(data, columns=['Time', 'Avg Accuracy', 'Type I Error','Type II Error','Senstivity','Specificity','AUC','Brier Score'])

# Print the Dzzzz
print(df)

            Time  Avg Accuracy  Type I Error  Type II Error  Senstivity   
0  Before Crisis            60           0.0            100         0.0  \
1  During Crisis            60           0.0            100         0.0   
2   After Crisis            60           0.0            100         0.0   

   Specificity   AUC  Brier Score  
0          100  58.3        22.70  
1          100   0.0        24.80  
2          100  66.6        24.17  
