In [2]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv('https://www.mldata.io/download-csv-weka/german_credit_data')

In [5]:
data.head()

Unnamed: 0,checking_account_status,duration,credit_history,purpose,credit_amount,savings,present_employment,installment_rate,personal,other_debtors,...,property,age,other_installment_plans,housing,existing_credits,job,dependents,telephone,foreign_worker,customer_type
0,A11,6,A34,A43,1169.0,A65,A75,4.0,A93,A101,...,A121,67.0,A143,A152,2.0,A173,1,A192,A201,1
1,A12,48,A32,A43,5951.0,A61,A73,2.0,A92,A101,...,A121,22.0,A143,A152,1.0,A173,1,A191,A201,2
2,A14,12,A34,A46,2096.0,A61,A74,2.0,A93,A101,...,A121,49.0,A143,A152,1.0,A172,2,A191,A201,1
3,A11,42,A32,A42,7882.0,A61,A74,2.0,A93,A103,...,A122,45.0,A143,A153,1.0,A173,2,A191,A201,1
4,A11,24,A33,A40,4870.0,A61,A73,3.0,A93,A101,...,A124,53.0,A143,A153,2.0,A173,2,A191,A201,2


### Function calculating potential earnings and loses for a bank depending on prediction model outcome

##### Sources

According to 'Overseas Business Reports', U.S. Department of Commerce, Breau of International Commerce, 1991 (chapter: 'banking and credit') mortgage interest rate on average was around 10%, for rest type of loans around 13%

'Banking Systems Simulation: Theory, Practice, and Application of Modeling Shocks, Losses, and Contagion', Stefano Zedda (chapter 2.10.1) LGD(loss given default) between 1990 and 2008 was at average 38%

In [133]:
ir_mort = 0.1 # mortgage credits don't exist in this dataset
ir_loan = 0.13
lgd = 0.38

##### Data for test

In [144]:
X_test = data.drop(['customer_type'], axis = 1)
data['customer_type'].replace([1,2], [1,0], inplace=True)
y_test = data['customer_type']
y_pred = pd.Series(np.random.randint(low=0, high=2, size=len(y_test)))

In [145]:
y_test == y_pred

0       True
1       True
2      False
3      False
4       True
5       True
6       True
7      False
8      False
9      False
10     False
11     False
12     False
13      True
14      True
15      True
16     False
17      True
18      True
19     False
20     False
21     False
22     False
23     False
24      True
25      True
26     False
27      True
28      True
29     False
       ...  
970     True
971    False
972    False
973     True
974    False
975     True
976    False
977    False
978    False
979     True
980    False
981    False
982     True
983    False
984    False
985     True
986    False
987    False
988    False
989    False
990    False
991    False
992     True
993     True
994     True
995     True
996     True
997    False
998     True
999    False
Length: 1000, dtype: bool

### Function

In [146]:
def calculateEarningsLosses(X_test, y_pred, y_test):
    '''
    As declared, takes test data and predicted classes and calculates:
    - earnings made by following prediction
    - losses made by following prediction
    - earnings omited by following prediction
    - losses omited by following prediction
    '''
    amounts = X_test['credit_amount']
    balance_all = y_test.apply(lambda x: ir_loan if x==1 else -lgd) * amounts
    earnings_made = balance_all.iloc[np.logical_and(y_test==1, y_pred==1).array].sum()
    earnings_omitted = balance_all.iloc[np.logical_and(y_test==1, y_pred==0).array].sum()
    losses_made = balance_all.iloc[np.logical_and(y_test==0, y_pred==1).array].sum()
    losses_omitted = balance_all.iloc[np.logical_and(y_test==0, y_pred==0).array].sum()
    
    results = pd.DataFrame(columns=['Earnings made', 'Earnings omitted', 'Losses made', 'Losses omitted'])
    results.loc[0] = [earnings_made, earnings_omitted, losses_made, losses_omitted]
    
    final_balance = earnings_made + losses_made
    max_income = balance_all.iloc[(y_test==1).array].sum()
    perc_of_max_income = final_balance/max_income
    return (results, final_balance, perc_of_max_income)

In [147]:
results, balance, perc = calculateEarningsLosses(X_test, y_pred, y_test)