## Importing and cleaning the dataset

In [1]:
# importing the data
credit<-read.csv("german_credit.csv")
str(credit)

'data.frame':	1000 obs. of  21 variables:
 $ Creditability                    : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Account.Balance                  : int  1 1 2 1 1 1 1 1 4 2 ...
 $ Duration.of.Credit..month.       : int  18 9 12 12 12 10 8 6 18 24 ...
 $ Payment.Status.of.Previous.Credit: int  4 4 2 4 4 4 4 4 4 2 ...
 $ Purpose                          : int  2 0 9 0 0 0 0 0 3 3 ...
 $ Credit.Amount                    : int  1049 2799 841 2122 2171 2241 3398 1361 1098 3758 ...
 $ Value.Savings.Stocks             : int  1 1 2 1 1 1 1 1 1 3 ...
 $ Length.of.current.employment     : int  2 3 4 3 3 2 4 2 1 1 ...
 $ Instalment.per.cent              : int  4 2 2 3 4 1 1 2 4 1 ...
 $ Sex...Marital.Status             : int  2 3 2 3 3 3 3 3 2 2 ...
 $ Guarantors                       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Duration.in.Current.address      : int  4 2 4 2 4 3 4 4 4 4 ...
 $ Most.valuable.available.asset    : int  2 1 1 1 2 1 1 1 3 4 ...
 $ Age..years.                      : int  21 36 23

In [2]:
# we remove the variables that do not affect one's creditability 
# i.e credit amount, age and duration of credit in months
s <- c(1,2,4,5,7,8,9,10,11,12,13,15,16,17,18,19,20,21)


In [4]:
# convert the integer variables to factors
for(i in s){
  credit[,i]<-as.factor(credit[,i])
}
credit_new<-credit[,s]
str(credit_new)

'data.frame':	1000 obs. of  18 variables:
 $ Creditability                    : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ Account.Balance                  : Factor w/ 4 levels "1","2","3","4": 1 1 2 1 1 1 1 1 4 2 ...
 $ Payment.Status.of.Previous.Credit: Factor w/ 5 levels "0","1","2","3",..: 5 5 3 5 5 5 5 5 5 3 ...
 $ Purpose                          : Factor w/ 10 levels "0","1","2","3",..: 3 1 9 1 1 1 1 1 4 4 ...
 $ Value.Savings.Stocks             : Factor w/ 5 levels "1","2","3","4",..: 1 1 2 1 1 1 1 1 1 3 ...
 $ Length.of.current.employment     : Factor w/ 5 levels "1","2","3","4",..: 2 3 4 3 3 2 4 2 1 1 ...
 $ Instalment.per.cent              : Factor w/ 4 levels "1","2","3","4": 4 2 2 3 4 1 1 2 4 1 ...
 $ Sex...Marital.Status             : Factor w/ 4 levels "1","2","3","4": 2 3 2 3 3 3 3 3 2 2 ...
 $ Guarantors                       : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
 $ Duration.in.Current.address      : Factor w/ 4 levels "1","2","3","4": 4

## Training and Test datasets

In [5]:
# Splitting the data using the indices generated using the sample command
# sample indexes
indexes<-sample(1:nrow(credit), size = 0.3*nrow(credit))

In [6]:
# split the data
credit_test<-credit_new[indexes,]
credit_train<-credit_new[-indexes,]
dim(credit_test)
dim(credit_train)

## Building the predictive model using Logistic Regression

Logistic regression is a classification algorith used to predict a binary outcome given a set of independent variables. 

In [7]:
# using only 5 variables to train the model with the train dataset
set.seed(1)
logisticModel <- glm(Creditability~Account.Balance + Payment.Status.of.Previous.Credit + Purpose + Length.of.current.employment + Sex...Marital.Status, family = binomial, data = credit_train)

In [8]:
logisticModel


Call:  glm(formula = Creditability ~ Account.Balance + Payment.Status.of.Previous.Credit + 
    Purpose + Length.of.current.employment + Sex...Marital.Status, 
    family = binomial, data = credit_train)

Coefficients:
                       (Intercept)                    Account.Balance2  
                           -2.0584                              0.5355  
                  Account.Balance3                    Account.Balance4  
                            1.3079                              1.7225  
Payment.Status.of.Previous.Credit1  Payment.Status.of.Previous.Credit2  
                            0.2853                              1.0601  
Payment.Status.of.Previous.Credit3  Payment.Status.of.Previous.Credit4  
                            0.9094                              1.9439  
                          Purpose1                            Purpose2  
                            1.1341                              0.4923  
                          Purpose3                