# Supress warnings

In [1]:
import warnings
warnings.filterwarnings('ignore')

# Install Dependencies 

In [2]:
import pandas as pd 
import numpy as np
# import keras 
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

pd.set_option('display.max_columns', None)

%matplotlib inline

# Create a helper function

In [3]:
# missing data info 

def show_missing_data(df):
    """
    This function takes only paramaeter that is Data Frame 
    and it shows the missing data with percentage and type 
    
    df < -  pandas data frame 
    """
    
    total = df.isnull().sum().sort_values(ascending=False)
    percent = (df.isnull().sum()/df.isnull().count()  * 100).sort_values(ascending=False)
    data_type = df.dtypes
    missing_data = pd.concat([total,percent,data_type],axis=1,keys=['Total','Percent','data_type']).sort_values("Total", axis = 0, ascending = False)
    return missing_data.head(df.shape[1])




# loading training and test data

In [4]:
train_data = pd.read_csv('risk_analytics_train.csv', index_col=0, header=0, low_memory=False) 
test_data = pd.read_csv('risk_analytics_test.csv', index_col=0, header=0)

## Preprocessing the training dataset**

In [5]:
train_data.shape 

(614, 12)

In [6]:
train_data.columns

Index(['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
       'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [7]:
test_data.shape

(367, 11)

In [8]:
test_data.columns

Index(['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
       'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area'],
      dtype='object')

In [9]:
train_data.head(10)

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LP001002,Male,No,0.0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
LP001003,Male,Yes,1.0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
LP001005,Male,Yes,0.0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
LP001006,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
LP001008,Male,No,0.0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
LP001011,Male,Yes,2.0,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
LP001013,Male,Yes,0.0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
LP001014,Male,Yes,3.0,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
LP001018,Male,Yes,2.0,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
LP001020,Male,Yes,1.0,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N


In [10]:
#finding the missing values
print(train_data.isnull().sum())


Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64


In [11]:
show_missing_data(train_data)  # using helper function

Unnamed: 0,Total,Percent,data_type
Credit_History,50,8.143322,float64
Self_Employed,32,5.211726,object
LoanAmount,22,3.583062,float64
Dependents,15,2.442997,float64
Loan_Amount_Term,14,2.28013,float64
Gender,13,2.117264,object
Married,3,0.488599,object
Education,0,0.0,object
ApplicantIncome,0,0.0,int64
CoapplicantIncome,0,0.0,float64


In [12]:
show_missing_data(test_data) # using helper function

Unnamed: 0,Total,Percent,data_type
Credit_History,29,7.901907,float64
Self_Employed,23,6.26703,object
Gender,11,2.997275,object
Dependents,10,2.724796,float64
Loan_Amount_Term,6,1.634877,float64
LoanAmount,5,1.362398,float64
Married,0,0.0,object
Education,0,0.0,object
ApplicantIncome,0,0.0,int64
CoapplicantIncome,0,0.0,int64


In [13]:
train_data.dtypes

Gender                object
Married               object
Dependents           float64
Education             object
Self_Employed         object
ApplicantIncome        int64
CoapplicantIncome    float64
LoanAmount           float64
Loan_Amount_Term     float64
Credit_History       float64
Property_Area         object
Loan_Status           object
dtype: object

In [14]:
train_data["Dependents"].mode()

0    0.0
Name: Dependents, dtype: float64

In [15]:
train_data["Dependents"].mode()[0]

0.0

# Imputing categorical missing data with mode value

In [16]:
colname1=["Gender","Married","Dependents","Self_Employed", "Loan_Amount_Term"]

for x in colname1:
    train_data[x].fillna(train_data[x].mode()[0],inplace=True)

In [17]:
train_data["Gender"].mode()

0    Male
Name: Gender, dtype: object

In [18]:
print(train_data.isnull().sum())

Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term      0
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64


# Imputing numerical missing data with mean value

In [19]:
train_data["LoanAmount"].fillna(round(train_data["LoanAmount"].mean(),0),
                                inplace=True)
print(train_data.isnull().sum())

Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term      0
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64


# Imputing values for credit_history column differently

In [20]:
train_data['Credit_History'].fillna(value=0, inplace=True)
#train_data['Credit_History']=train_data['Credit_History'].fillna(value=0)
print(train_data.isnull().sum())

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64


# Chek the categories of every column 

In [76]:
for i in train_data.columns:
    print("######  " , i , "  ######")
    print(train_data[i].value_counts())
    print()

######   Gender   ######
Gender
1    502
0    112
Name: count, dtype: int64

######   Married   ######
Married
1    401
0    213
Name: count, dtype: int64

######   Dependents   ######
Dependents
0.0    360
1.0    102
2.0    101
3.0     51
Name: count, dtype: int64

######   Education   ######
Education
0    480
1    134
Name: count, dtype: int64

######   Self_Employed   ######
Self_Employed
0    532
1     82
Name: count, dtype: int64

######   ApplicantIncome   ######
ApplicantIncome
2500    9
4583    6
6000    6
2600    6
3333    5
       ..
3244    1
4408    1
3917    1
3992    1
7583    1
Name: count, Length: 505, dtype: int64

######   CoapplicantIncome   ######
CoapplicantIncome
0.0       273
2500.0      5
2083.0      5
1666.0      5
2250.0      3
         ... 
2791.0      1
1010.0      1
1695.0      1
2598.0      1
240.0       1
Name: count, Length: 287, dtype: int64

######   LoanAmount   ######
LoanAmount
146.0    23
120.0    20
110.0    17
100.0    15
160.0    12
         ..

data -> numbers 

encoding technique
1. dummy encoding
2. onehot encoder
2. label encoder
4. manual encoding 

# Transforming categorical data to numerical

In [77]:

from sklearn import preprocessing
colname=['Gender','Married','Education','Self_Employed','Property_Area',
         'Loan_Status']

le=preprocessing.LabelEncoder()

for x in colname:
     train_data[x]=le.fit_transform(train_data[x])


## converted Loan status as Y-->1 and N-->0

In [78]:
train_data.head()

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LP001002,1,0,0.0,0,0,5849,0.0,146.0,360.0,1.0,2,1
LP001003,1,1,1.0,0,0,4583,1508.0,128.0,360.0,1.0,0,0
LP001005,1,1,0.0,0,1,3000,0.0,66.0,360.0,1.0,2,1
LP001006,1,1,0.0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
LP001008,1,0,0.0,0,0,6000,0.0,141.0,360.0,1.0,2,1


## Preprocessing the testing dataset

In [79]:
test_data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Y_predictions
0,LP001015,Male,Yes,0.0,Graduate,No,5720,0,110.0,360.0,1.0,Urban,Eligible
1,LP001022,Male,Yes,1.0,Graduate,No,3076,1500,126.0,360.0,1.0,Urban,Eligible
2,LP001031,Male,Yes,2.0,Graduate,No,5000,1800,208.0,360.0,1.0,Urban,Eligible
3,LP001035,Male,Yes,2.0,Graduate,No,2340,2546,100.0,360.0,,Urban,Not Eligible
4,LP001051,Male,No,0.0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban,Eligible


In [80]:
#finding the missing values

print(test_data.isnull().sum())
print(test_data.shape)

Loan_ID               0
Gender               11
Married               0
Dependents           10
Education             0
Self_Employed        23
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            5
Loan_Amount_Term      6
Credit_History       29
Property_Area         0
Y_predictions         0
dtype: int64
(367, 13)


In [81]:
#imputing missing data with mode value 

colname1=["Gender","Dependents","Self_Employed", "Loan_Amount_Term"]


for x in colname1:
    test_data[x].fillna(test_data[x].mode()[0],inplace=True)


In [82]:
print(test_data.isnull().sum())

Loan_ID               0
Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            5
Loan_Amount_Term      0
Credit_History       29
Property_Area         0
Y_predictions         0
dtype: int64


In [83]:
#imputing numerical missing data with mean value

test_data["LoanAmount"].fillna(round(test_data["LoanAmount"].mean(),0),inplace=True)
print(test_data.isnull().sum())

Loan_ID               0
Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term      0
Credit_History       29
Property_Area         0
Y_predictions         0
dtype: int64


In [84]:
#imputing values for credit_history column differently
test_data['Credit_History'].fillna(value=0, inplace=True)
print(test_data.isnull().sum())

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Y_predictions        0
dtype: int64


In [85]:
#transforming categorical data to numerical

from sklearn import preprocessing

colname=['Gender','Married','Education','Self_Employed','Property_Area']

le=preprocessing.LabelEncoder()

for x in colname:
     test_data[x]=le.fit_transform(test_data[x])

In [86]:
test_data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Y_predictions
0,LP001015,1,1,0.0,0,0,5720,0,110.0,360.0,1.0,2,Eligible
1,LP001022,1,1,1.0,0,0,3076,1500,126.0,360.0,1.0,2,Eligible
2,LP001031,1,1,2.0,0,0,5000,1800,208.0,360.0,1.0,2,Eligible
3,LP001035,1,1,2.0,0,0,2340,2546,100.0,360.0,0.0,2,Not Eligible
4,LP001051,1,0,0.0,1,0,3276,0,78.0,360.0,1.0,2,Eligible


# Creating training and testing datasets and running the model 

In [87]:
X_train=train_data.values[  :  ,  :-1]
Y_train=train_data.values[:,-1]

In [88]:
# convert the y train data type to int
Y_train=Y_train.astype(int)

In [89]:
Y_train

array([1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,

In [90]:
#test_data.head()
X_test=test_data.values[:,:]

In [91]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)

(614, 11)
(367, 13)
(614,)


# Scaling the train data and test data

In [92]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

ValueError: could not convert string to float: 'LP001015'

In [39]:
X_train

array([[ 0.47234264, -1.37208932, -0.73780632, ...,  0.2732313 ,
         0.54095432,  1.22329839],
       [ 0.47234264,  0.72881553,  0.25346957, ...,  0.2732313 ,
         0.54095432, -1.31851281],
       [ 0.47234264,  0.72881553, -0.73780632, ...,  0.2732313 ,
         0.54095432,  1.22329839],
       ...,
       [ 0.47234264,  0.72881553,  0.25346957, ...,  0.2732313 ,
         0.54095432,  1.22329839],
       [ 0.47234264,  0.72881553,  1.24474546, ...,  0.2732313 ,
         0.54095432,  1.22329839],
       [-2.11710719, -1.37208932, -0.73780632, ...,  0.2732313 ,
        -1.84858491, -0.04760721]])

# Creating the model

In [40]:
from sklearn import svm # cntrl + shift + -

In [41]:
from sklearn.tree import DecisionTreeClassifier

In [42]:
svc_model=svm.SVC(kernel='rbf',
                  C=1.0,
                  gamma=0.1)

svc_model.fit(X_train, Y_train)

In [43]:
dt_model = DecisionTreeClassifier()

In [44]:
dt_model.fit(X_train, Y_train)

# Prediction on test data

In [45]:
Y_pred = svc_model.predict(X_test)

In [46]:
print(list(Y_pred))


[1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 

# create the final df with predicted output for test data

In [47]:
test_data=pd.read_csv('risk_analytics_test.csv',header=0)
test_data["Y_predictions"]=Y_pred
test_data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Y_predictions
0,LP001015,Male,Yes,0.0,Graduate,No,5720,0,110.0,360.0,1.0,Urban,1
1,LP001022,Male,Yes,1.0,Graduate,No,3076,1500,126.0,360.0,1.0,Urban,1
2,LP001031,Male,Yes,2.0,Graduate,No,5000,1800,208.0,360.0,1.0,Urban,1
3,LP001035,Male,Yes,2.0,Graduate,No,2340,2546,100.0,360.0,,Urban,0
4,LP001051,Male,No,0.0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban,1


In [48]:
test_data["Y_predictions"]=test_data["Y_predictions"].map({1:"Eligible",
                                                           0:"Not Eligible"})

In [49]:
test_data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Y_predictions
0,LP001015,Male,Yes,0.0,Graduate,No,5720,0,110.0,360.0,1.0,Urban,Eligible
1,LP001022,Male,Yes,1.0,Graduate,No,3076,1500,126.0,360.0,1.0,Urban,Eligible
2,LP001031,Male,Yes,2.0,Graduate,No,5000,1800,208.0,360.0,1.0,Urban,Eligible
3,LP001035,Male,Yes,2.0,Graduate,No,2340,2546,100.0,360.0,,Urban,Not Eligible
4,LP001051,Male,No,0.0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban,Eligible


In [50]:
test_data.to_csv('test_data.csv')

In [51]:
test_data.Y_predictions.value_counts()

Y_predictions
Eligible        285
Not Eligible     82
Name: count, dtype: int64

# Using cross validation (Optional)

In [52]:

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier

#classifier=svm.SVC(kernel='rbf',C=1.0,gamma=0.1) #75.89%
#classifier=KNeighborsClassifier(n_neighbors=11, metric='euclidean') #75.07%
#classifier=svm.SVC(kernel='rbf',C=10.0,gamma=0.001) #77.03%
classifier=LogisticRegression() #77.20%

#performing kfold_cross_validation
from sklearn.model_selection import KFold
kfold_cv=KFold(n_splits=10)
print(kfold_cv)

from sklearn.model_selection import cross_val_score
#running the model using scoring metric as accuracy
kfold_cv_result=cross_val_score(estimator=classifier,X=X_train,
                                                 y=Y_train, cv=kfold_cv)
print(kfold_cv_result)
#finding the mean
print(kfold_cv_result.mean())


KFold(n_splits=10, random_state=None, shuffle=False)
[0.77419355 0.82258065 0.74193548 0.72580645 0.7704918  0.68852459
 0.80327869 0.7704918  0.78688525 0.83606557]
0.772025383395029


In [53]:
svc_model.score(X_train,Y_train)

0.7947882736156352

In [None]:
# for x in range(0,len(Y_pred_col)):

#     if Y_pred_col[x]==0:
#         Y_pred_col[x]= "N"
#     else:
#         Y_pred_col[x]="Y"
    
# print(Y_pred_col)

# <a> Predicting for single person </a>

In [54]:
print("#######  Actual data  #######")
print(train_data.head(1)) 
print("#######  Scaler data  #######")
print(X_train[0])
print("#######  Label  #######")
print(Y_train[0])

#######  Actual data  #######
          Gender  Married  Dependents  Education  Self_Employed  \
Loan_ID                                                           
LP001002       1        0         0.0          0              0   

          ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
Loan_ID                                                                      
LP001002             5849                0.0       146.0             360.0   

          Credit_History  Property_Area  Loan_Status  
Loan_ID                                               
LP001002             1.0              2            1  
#######  Scaler data  #######
[ 0.47234264 -1.37208932 -0.73780632 -0.52836225 -0.39260074  0.07299082
 -0.55448733 -0.00473263  0.2732313   0.54095432  1.22329839]
#######  Label  #######
1


## Create a json object from row

In [55]:
from pprint import pprint

In [56]:
a = train_data.iloc[0]
a

Gender                  1.0
Married                 0.0
Dependents              0.0
Education               0.0
Self_Employed           0.0
ApplicantIncome      5849.0
CoapplicantIncome       0.0
LoanAmount            146.0
Loan_Amount_Term      360.0
Credit_History          1.0
Property_Area           2.0
Loan_Status             1.0
Name: LP001002, dtype: float64

In [57]:
c = a.to_dict()
pprint(c)

{'ApplicantIncome': 5849.0,
 'CoapplicantIncome': 0.0,
 'Credit_History': 1.0,
 'Dependents': 0.0,
 'Education': 0.0,
 'Gender': 1.0,
 'LoanAmount': 146.0,
 'Loan_Amount_Term': 360.0,
 'Loan_Status': 1.0,
 'Married': 0.0,
 'Property_Area': 2.0,
 'Self_Employed': 0.0}


In [58]:
sample_json = c
pprint(sample_json)

{'ApplicantIncome': 5849.0,
 'CoapplicantIncome': 0.0,
 'Credit_History': 1.0,
 'Dependents': 0.0,
 'Education': 0.0,
 'Gender': 1.0,
 'LoanAmount': 146.0,
 'Loan_Amount_Term': 360.0,
 'Loan_Status': 1.0,
 'Married': 0.0,
 'Property_Area': 2.0,
 'Self_Employed': 0.0}


In [59]:
gen = sample_json['Gender']
mar = sample_json['Married']
dep = sample_json['Dependents']
edu = sample_json['Education']
sle = sample_json['Self_Employed']
api = sample_json['ApplicantIncome']
cpi = sample_json['CoapplicantIncome']
lam = sample_json['LoanAmount']
lat = sample_json['Loan_Amount_Term']
crh = sample_json['Credit_History']
pra = sample_json['Property_Area']

In [60]:
person = [[gen,mar,dep,edu,sle,api,cpi,lam,lat,crh,pra]]
print(person)

[[1.0, 0.0, 0.0, 0.0, 0.0, 5849.0, 0.0, 146.0, 360.0, 1.0, 2.0]]


In [61]:
person = scaler.transform(person)
print(person)

[[ 0.47234264 -1.37208932 -0.73780632 -0.52836225 -0.39260074  0.07299082
  -0.55448733 -0.00473263  0.2732313   0.54095432  1.22329839]]


## Another method  (optional)

In [62]:
X_train[0]

array([ 0.47234264, -1.37208932, -0.73780632, -0.52836225, -0.39260074,
        0.07299082, -0.55448733, -0.00473263,  0.2732313 ,  0.54095432,
        1.22329839])

In [63]:
# test1  = [[ 0.47234264, -1.37208932, -0.73780632, -0.52836225, -0.39260074,
#         0.07299055448733, -0.00473263,  0.2732313 ,  0.54095432,
#         1.22329839]]82, -0.

# print(test1)

----

In [64]:
classes = np.array(['Not-Eligible:- 0', 'Eligible:- 1'])
print(classes)

['Not-Eligible:- 0' 'Eligible:- 1']


In [65]:
class_ind =svc_model.predict(person)
print(class_ind)
type(class_ind)

[1]


numpy.ndarray

In [66]:
classes[class_ind]

array(['Eligible:- 1'], dtype='<U16')

In [67]:
print("Actual class : {}\n Predicted Class: {}".format(train_data["Loan_Status"][0],classes[class_ind]))

Actual class : 1
 Predicted Class: ['Eligible:- 1']


# <a> CODE FOR DEPLOYMENT: </a>

## 1. Save the scaler object

In [68]:
import joblib   #

In [69]:
# dump the sclaer in pwd

joblib.dump(scaler,'svm_scaler.pkl')

['svm_scaler.pkl']

In [70]:
# load the sclaer from pwd

person_scaler = joblib.load('svm_scaler.pkl')


eg.

## 2. save the model

In [71]:
import pickle

In [72]:
# Save to file in the current working directory

pkl_filename = "svc_pickle_model.pkl"
with open(pkl_filename, 'wb') as file1:
    pickle.dump(svc_model, file1)

In [73]:
# Load from file
pkl_filename = "svc_pickle_model.pkl"
with open(pkl_filename, 'rb') as file2:
    svc_pickle_model = pickle.load(file2)

In [74]:
joblib.dump(dt_model, "dt_pickle_model.pkl")

['dt_pickle_model.pkl']

## 3. Create a function for prediction 

In [97]:
def return_prediction(model,scaler,sample_json):

    """
    Objective: This function is used to predict on unseen 
    
    Pramas:
    -------
        model: trained model on training data
        scaler: object for normalizing 
        sampl_json:  input json data for prediction
        
    Return:
    ------
        prediction : eligble or else 

    """
    # For larger data features, you should probably write a for loop
    # That builds out this array for you
    
    gen = sample_json['Gender']
    mar = sample_json['Married']
    dep = sample_json['Dependents']
    edu = sample_json['Education']
    sle = sample_json['Self_Employed']
    api = sample_json['ApplicantIncome']
    cpi = sample_json['CoapplicantIncome']
    lam = sample_json['LoanAmount']
    lat = sample_json['Loan_Amount_Term']
    crh = sample_json['Credit_History']
    pra = sample_json['Property_Area']
    
    person = [[gen,mar,dep,edu,sle,api,cpi,lam,lat,crh,pra]]
    
    person = scaler.transform(person)
    
    classes = np.array(['Not-Eligible:- 0', 'Eligible:- 1'])
    
    class_ind = model.predict(person)
    
    return classes[class_ind]
                    

# Test The Function

### create a function for user input

In [94]:
def ask_user():
    
    user_data = {}
    
    a = float(input("Enter Your Gender:\n 1 -> Male | 0 -> Female  "))
    user_data["Gender"] = a
    
    a = float(input("Married Or Unmarried:\n 1 -> Yes | 0 -> No  "))
    user_data["Married"] = a
    
    a = float(input("Dependent People : "))
    user_data["Dependents"] = a
    
    a = float(input("Education :\n 1 -> Not Graduate | 0 -> Graduate "))
    user_data["Education"] = a
    
    a = float(input("Self_Employed :\n 1 -> Yes | 0 -> No  "))
    user_data["Self_Employed"] = a
    
    a = float(input("Applicant Income In digits : "))
    user_data["ApplicantIncome"] = a
    
    a = float(input("Coapplicant Income In digits : "))
    user_data["CoapplicantIncome"] = a
    
    a = float(input("Loan Amount In digits : "))
    user_data["LoanAmount"] = a
    
    a = float(input("Loan Amount Term In digits : "))
    user_data["Loan_Amount_Term"] = a
    
    a = float(input("Credit History :\n 1 -> Yes | 0 -> No   "))
    user_data["Credit_History"] = a
    
    a = float(input("Property Area :\n 0 -> Rural | 1 - > Semiurban | 2 -> Urban "))
    user_data["Property_Area"] = a
    
    return user_data

    

In [96]:
user_info = ask_user()
user_info 

Enter Your Gender:
 1 -> Male | 0 -> Female  1
Married Or Unmarried:
 1 -> Yes | 0 -> No  0
Dependent People : 5
Education :
 1 -> Not Graduate | 0 -> Graduate 0
Self_Employed :
 1 -> Yes | 0 -> No  0
Applicant Income In digits : 0
Coapplicant Income In digits : 0
Loan Amount In digits : 50000
Loan Amount Term In digits : 566777
Credit History :
 1 -> Yes | 0 -> No   0
Property Area :
 0 -> Rural | 1 - > Semiurban | 2 -> Urban 2


{'Gender': 1.0,
 'Married': 0.0,
 'Dependents': 5.0,
 'Education': 0.0,
 'Self_Employed': 0.0,
 'ApplicantIncome': 0.0,
 'CoapplicantIncome': 0.0,
 'LoanAmount': 50000.0,
 'Loan_Amount_Term': 566777.0,
 'Credit_History': 0.0,
 'Property_Area': 2.0}

In [82]:
return_prediction(model = svc_pickle_model,
                 scaler = person_scaler,
                 sample_json = user_info)

array(['Eligible:- 1'], dtype='<U16')

In [83]:
return_prediction(model = dt_model,
                 scaler = person_scaler,
                 sample_json = user_info)

array(['Eligible:- 1'], dtype='<U16')

In [None]:
data structures 

1. list 4
 dict 
    

In [None]:
10 q 

# convert the user_info in double qoutes 

In [None]:
import json
print(json.dumps(user_info))

# <a> API Call with Postman </a>

## THIS IS WHAT WE DO IN POSTMAN 

# <a> API Call with Python </a>

## Step 1:

Make sure you are already running 01-Basic-API.py

## Step 2:

Instead of using PostMan, let us programmatically request with an API

In [None]:
import requests

test_example = {"Gender": 1.0, 
                  "Married": 0.0, 
                  "Dependents": 0.0, 
                  "Education": 0.0, 
                  "Self_Employed": 0.0, 
                  "ApplicantIncome": 5849.0, 
                  "CoapplicantIncome": 0.0, 
                  "LoanAmount": 146.0, 
                  "Loan_Amount_Term": 360.0, 
                  "Credit_History": 1.0, 
                  "Property_Area": 2.0}

In [None]:
r = requests.post("http://192.168.1.34:5000/prediction",
                  json=test_example)

In [None]:
if r.status_code == 200:
    print(f"Success: {r.text}")
else:
    print(f"Failure: {r.text}")

In [None]:
r.url

### <a style="color:#FF0000;"> IMP Note : Set localhost = '0.0.0.0' and port = 8080 in 01-Basic-API.py To accept the request from other client over a wifi Connection </a>

# <center> <a>THE END </a> </center>