# Supress warnings

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Install Dependencies 

In [None]:
import pandas as pd 
import numpy as np
# import keras 
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

pd.set_option('display.max_columns', None)

%matplotlib inline

# Create a helper function

In [None]:
# missing data info 

def show_missing_data(df):
    """
    This function takes only paramaeter that is Data Frame 
    and it shows the missing data with percentage and type 
    
    df < -  pandas data frame 
    """
    
    total = df.isnull().sum().sort_values(ascending=False)
    percent = (df.isnull().sum()/df.isnull().count()  * 100).sort_values(ascending=False)
    data_type = df.dtypes
    missing_data = pd.concat([total,percent,data_type],axis=1,keys=['Total','Percent','data_type']).sort_values("Total", axis = 0, ascending = False)
    return missing_data.head(df.shape[1])




# loading training and test data

In [None]:
train_data = pd.read_csv('risk_analytics_train.csv', index_col=0, header=0, low_memory=False) #low memory indicates computing power of data if false its powerful
test_data = pd.read_csv('risk_analytics_test.csv', index_col=0, header=0) 

## Preprocessing the training dataset**

In [None]:
train_data.shape 

In [None]:
train_data.columns

In [None]:
test_data.shape

In [None]:
test_data.columns

In [None]:
train_data.head(10)

In [None]:
#finding the missing values
print(train_data.isnull().sum())


In [None]:
show_missing_data(train_data)  # using helper function

In [None]:
show_missing_data(test_data) # using helper function

In [None]:
train_data.dtypes

In [None]:
train_data["Dependents"].mode()

In [None]:
train_data["Dependents"].mode()[0]

# Imputing categorical missing data with mode value

In [None]:
colname1=["Gender","Married","Dependents","Self_Employed", "Loan_Amount_Term"]

for x in colname1:
    train_data[x].fillna(train_data[x].mode()[0],inplace=True)

In [None]:
train_data["Gender"].mode()

In [None]:
print(train_data.isnull().sum())

# Imputing numerical missing data with mean value

In [None]:
train_data["LoanAmount"].fillna(round(train_data["LoanAmount"].mean(),0),
                                inplace=True)
print(train_data.isnull().sum())

# Imputing values for credit_history column differently

In [None]:
train_data['Credit_History'].fillna(value=0, inplace=True)
#train_data['Credit_History']=train_data['Credit_History'].fillna(value=0)
print(train_data.isnull().sum())

# Chek the categories of every column 

In [None]:
for i in train_data.columns:
    print("######  " , i , "  ######")
    print(train_data[i].value_counts())
    print()

In [None]:
data -> numbers 

encoding technique
1. dummy encoding
2. onehot encoder
2. label encoder
4. manual encoding 

# Transforming categorical data to numerical

In [None]:

from sklearn import preprocessing
colname=['Gender','Married','Education','Self_Employed','Property_Area',
         'Loan_Status']

le=preprocessing.LabelEncoder()

for x in colname:
     train_data[x]=le.fit_transform(train_data[x])


## converted Loan status as Y-->1 and N-->0

In [None]:
train_data.head()

## Preprocessing the testing dataset

In [None]:
test_data.head()

In [None]:
#finding the missing values

print(test_data.isnull().sum())
print(test_data.shape)

In [None]:
#imputing missing data with mode value 

colname1=["Gender","Dependents","Self_Employed", "Loan_Amount_Term"]


for x in colname1:
    test_data[x].fillna(test_data[x].mode()[0],inplace=True)


In [None]:
print(test_data.isnull().sum())

In [None]:
#imputing numerical missing data with mean value

test_data["LoanAmount"].fillna(round(test_data["LoanAmount"].mean(),0),inplace=True)
print(test_data.isnull().sum())

In [None]:
#imputing values for credit_history column differently
test_data['Credit_History'].fillna(value=0, inplace=True)
print(test_data.isnull().sum())

In [None]:
#transforming categorical data to numerical

from sklearn import preprocessing

colname=['Gender','Married','Education','Self_Employed','Property_Area']

le=preprocessing.LabelEncoder()

for x in colname:
     test_data[x]=le.fit_transform(test_data[x])

In [None]:
test_data.head()

# Creating training and testing datasets and running the model 

In [None]:
X_train=train_data.values[  :  ,  :-1]
Y_train=train_data.values[:,-1]

In [None]:
# convert the y train data type to int
Y_train=Y_train.astype(int)

In [None]:
Y_train

In [None]:
#test_data.head()
X_test=test_data.values[:,:]

In [None]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)

# Scaling the train data and test data

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

# Creating the model

In [None]:
from sklearn import svm # cntrl + shift + -

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
svc_model=svm.SVC(kernel='rbf',
                  C=1.0,
                  gamma=0.1)

svc_model.fit(X_train, Y_train)

In [None]:
dt_model = DecisionTreeClassifier()

In [None]:
dt_model.fit(X_train, Y_train)

# Prediction on test data

In [None]:
Y_pred = svc_model.predict(X_test)

In [None]:
print(list(Y_pred))


# create the final df with predicted output for test data

In [None]:
test_data=pd.read_csv('risk_analytics_test.csv',header=0)
test_data["Y_predictions"]=Y_pred
test_data.head()

In [None]:
test_data["Y_predictions"]=test_data["Y_predictions"].map({1:"Eligible",
                                                           0:"Not Eligible"})

In [None]:
test_data.head()

In [None]:
test_data.to_csv('test_data.csv')

In [None]:
test_data.Y_predictions.value_counts()

# Using cross validation (Optional)

In [None]:

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier

#classifier=svm.SVC(kernel='rbf',C=1.0,gamma=0.1) #75.89%
#classifier=KNeighborsClassifier(n_neighbors=11, metric='euclidean') #75.07%
#classifier=svm.SVC(kernel='rbf',C=10.0,gamma=0.001) #77.03%
classifier=LogisticRegression() #77.20%

#performing kfold_cross_validation
from sklearn.model_selection import KFold
kfold_cv=KFold(n_splits=10)
print(kfold_cv)

from sklearn.model_selection import cross_val_score
#running the model using scoring metric as accuracy
kfold_cv_result=cross_val_score(estimator=classifier,X=X_train,
                                                 y=Y_train, cv=kfold_cv)
print(kfold_cv_result)
#finding the mean
print(kfold_cv_result.mean())


In [None]:
svc_model.score(X_train,Y_train)

In [None]:
# for x in range(0,len(Y_pred_col)):

#     if Y_pred_col[x]==0:
#         Y_pred_col[x]= "N"
#     else:
#         Y_pred_col[x]="Y"
    
# print(Y_pred_col)

# <a> Predicting for single person </a>

In [None]:
print("#######  Actual data  #######")
print(train_data.head(1)) 
print("#######  Scaler data  #######")
print(X_train[0])
print("#######  Label  #######")
print(Y_train[0])

## Create a json object from row

In [None]:
from pprint import pprint

In [None]:
a = train_data.iloc[0]
a

In [None]:
c = a.to_dict()
pprint(c)

In [None]:
sample_json = c
pprint(sample_json)

In [None]:
gen = sample_json['Gender']
mar = sample_json['Married']
dep = sample_json['Dependents']
edu = sample_json['Education']
sle = sample_json['Self_Employed']
api = sample_json['ApplicantIncome']
cpi = sample_json['CoapplicantIncome']
lam = sample_json['LoanAmount']
lat = sample_json['Loan_Amount_Term']
crh = sample_json['Credit_History']
pra = sample_json['Property_Area']

In [None]:
person = [[gen,mar,dep,edu,sle,api,cpi,lam,lat,crh,pra]]
print(person)

In [None]:
person = scaler.transform(person)
print(person)

## Another method  (optional)

In [None]:
X_train[0]

In [None]:
# test1  = [[ 0.47234264, -1.37208932, -0.73780632, -0.52836225, -0.39260074,
#         0.07299055448733, -0.00473263,  0.2732313 ,  0.54095432,
#         1.22329839]]82, -0.

# print(test1)

----

In [None]:
classes = np.array(['Not-Eligible:- 0', 'Eligible:- 1'])
print(classes)

In [None]:
class_ind =svc_model.predict(person)
print(class_ind)
type(class_ind)

In [None]:
classes[class_ind]

In [None]:
print("Actual class : {}\n Predicted Class: {}".format(train_data["Loan_Status"][0],classes[class_ind]))

# <a> CODE FOR DEPLOYMENT: </a>

## 1. Save the scaler object

In [None]:
import joblib

In [None]:
# dump the sclaer in pwd

joblib.dump(scaler,'svm_scaler.pkl')

In [None]:
# load the sclaer from pwd

person_scaler = joblib.load('svm_scaler.pkl')


eg.

## 2. save the model

In [None]:
import pickle

In [None]:
# Save to file in the current working directory

pkl_filename = "svc_pickle_model.pkl"
with open(pkl_filename, 'wb') as file1:
    pickle.dump(svc_model, file1)

In [None]:
# Load from file
pkl_filename = "svc_pickle_model.pkl"
with open(pkl_filename, 'rb') as file2:
    svc_pickle_model = pickle.load(file2)

In [None]:
joblib.dump(dt_model, "dt_pickle_model.pkl")

## 3. Create a function for prediction 

In [None]:
def return_prediction(model,scaler,sample_json):

    """
    Objective: This function is used to predict on unseen 
    
    Pramas:
    -------
        model: trained model on training data
        scaler: object for normalizing 
        sampl_json:  input json data for prediction
        
    Return:
    ------
        prediction : eligble or else 

    """
    # For larger data features, you should probably write a for loop
    # That builds out this array for you
    
    gen = sample_json['Gender']
    mar = sample_json['Married']
    dep = sample_json['Dependents']
    edu = sample_json['Education']
    sle = sample_json['Self_Employed']
    api = sample_json['ApplicantIncome']
    cpi = sample_json['CoapplicantIncome']
    lam = sample_json['LoanAmount']
    lat = sample_json['Loan_Amount_Term']
    crh = sample_json['Credit_History']
    pra = sample_json['Property_Area']
    
    person = [[gen,mar,dep,edu,sle,api,cpi,lam,lat,crh,pra]]
    
    person = scaler.transform(person)
    
    classes = np.array(['Not-Eligible:- 0', 'Eligible:- 1'])
    
    class_ind = model.predict(person)
    
    return classes[class_ind]
                    

# Test The Function

### create a function for user input

In [None]:
def ask_user():
    
    user_data = {}
    
    a = float(input("Enter Your Gender:\n 1 -> Male | 0 -> Female  "))
    user_data["Gender"] = a
    
    a = float(input("Married Or Unmarried:\n 1 -> Yes | 0 -> No  "))
    user_data["Married"] = a
    
    a = float(input("Dependent People : "))
    user_data["Dependents"] = a
    
    a = float(input("Education :\n 1 -> Not Graduate | 0 -> Graduate "))
    user_data["Education"] = a
    
    a = float(input("Self_Employed :\n 1 -> Yes | 0 -> No  "))
    user_data["Self_Employed"] = a
    
    a = float(input("Applicant Income In digits : "))
    user_data["ApplicantIncome"] = a
    
    a = float(input("Coapplicant Income In digits : "))
    user_data["CoapplicantIncome"] = a
    
    a = float(input("Loan Amount In digits : "))
    user_data["LoanAmount"] = a
    
    a = float(input("Loan Amount Term In digits : "))
    user_data["Loan_Amount_Term"] = a
    
    a = float(input("Credit History :\n 1 -> Yes | 0 -> No   "))
    user_data["Credit_History"] = a
    
    a = float(input("Property Area :\n 0 -> Rural | 1 - > Semiurban | 2 -> Urban "))
    user_data["Property_Area"] = a
    
    return user_data

    

In [None]:
user_info = ask_user()
user_info 

In [None]:
return_prediction(model = svc_pickle_model,
                 scaler = person_scaler,
                 sample_json = user_info)

In [None]:
return_prediction(model = dt_model,
                 scaler = person_scaler,
                 sample_json = user_info)

In [None]:
data structures 

1. list 4
 dict 
    

In [None]:
10 q 

# convert the user_info in double qoutes 

In [None]:
import json
print(json.dumps(user_info))

# <a> API Call with Postman </a>

## THIS IS WHAT WE DO IN POSTMAN 

# <a> API Call with Python </a>

## Step 1:

Make sure you are already running 01-Basic-API.py

## Step 2:

Instead of using PostMan, let us programmatically request with an API

In [None]:
import requests

test_example = {"Gender": 1.0, 
                  "Married": 0.0, 
                  "Dependents": 0.0, 
                  "Education": 0.0, 
                  "Self_Employed": 0.0, 
                  "ApplicantIncome": 5849.0, 
                  "CoapplicantIncome": 0.0, 
                  "LoanAmount": 146.0, 
                  "Loan_Amount_Term": 360.0, 
                  "Credit_History": 1.0, 
                  "Property_Area": 2.0}

In [None]:
r = requests.post("http://192.168.1.34:5000/prediction",
                  json=test_example)

In [None]:
if r.status_code == 200:
    print(f"Success: {r.text}")
else:
    print(f"Failure: {r.text}")

In [None]:
r.url

### <a style="color:#FF0000;"> IMP Note : Set localhost = '0.0.0.0' and port = 8080 in 01-Basic-API.py To accept the request from other client over a wifi Connection </a>

# <center> <a>THE END </a> </center>