# Loan Approval



**Pipeline**

**1. Loading Libraries**

In [None]:
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing, svm
from itertools import combinations
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder, StandardScaler
import sklearn.feature_selection
from sklearn.model_selection import train_test_split
from collections import defaultdict
from sklearn import metrics


**2. Loading Our Dataset**
Dalla sezione Files scegli il file: customers_credit_status.csv
Apri Insert to code


In [None]:

#Inserisci qui sotto l'accesso ai dati


#delle linee di codice generate, cancella le due righe df.data...

credit_status = pd.read_csv(body)
credit_status.head()



In [None]:
# Checking that everything is correctpd.set_option('display.max_column

credit_status.head(10)

**3. Get some info about our Dataset and whether we have missing values**

In [None]:
# After running this cell we will see that we have no missing values
credit_status.info()

In [None]:
# Check if we have any NaN values
credit_status.isnull().values.any()

**4. Descriptive analytics for our data**

In [None]:
# Describe columns with numerical values
pd.set_option('precision', 3)
credit_status.describe()

In [None]:
# Find correlations
credit_status.corr()

**5. Visualize our Data to understand it better**

**Plot Relationships**

In [None]:
# Create Grid for pairwise relationships
gr = sns.PairGrid(credit_status, size=3, hue='class')
gr = gr.map_diag(plt.hist)
gr = gr.map_offdiag(plt.scatter)
gr = gr.add_legend()

**Understand Data Distribution**

In [None]:
# Set up plot size
fig, ax = plt.subplots(figsize=(20,10))

# Attributes destribution
a = sns.boxplot(orient="v", palette="hls", data=credit_status['credit_amount'], fliersize=14)

**6. Encode string values in data into numerical values**

In [None]:
# Tenure data distribution
histogram = sns.distplot(credit_status['credit_amount'], hist=True)
plt.show()

In [None]:
# Use pandas get_dummies
credit_status_encoded = pd.get_dummies(credit_status)
credit_status_encoded.head(10)

**7. Create Training Set and Labels**

In [None]:
# Create training data for that will undergo preprocessing
X = credit_status_encoded.iloc[:, :-2]
X.head()

In [None]:
# Extract labels
from sklearn.preprocessing import LabelEncoder

# Split last column from original dataset as the labels column
y = credit_status['class']

# Apply encoder to transform strings to numeric values 0 and 1
le = LabelEncoder().fit(y)

y_enc = le.transform(y)
pd.DataFrame(y_enc).head(10)

**8. Detect outliers in numerical values**

In [None]:
# Detect outlier using interquartile method and remove them
def find_outliers(df):
    quartile_1, quartile_3 = np.percentile(df, [25, 75])
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * 1.5)
    upper_bound = quartile_3 + (iqr * 1.5)

    outlier_indices = list(df.index[(df < lower_bound)|(df > upper_bound)])
    outlier_values = list(df[outlier_indices])
    
    df[outlier_indices] = np.NaN
    
    return df   

In [None]:
# Find outliers in first column (continuous values)
print(find_outliers(X['duration']))

In [None]:
# Find outliers in first column (continuous values)
print(find_outliers(X['credit_amount']))

In [None]:
# Find outliers in first column (continuous values)
print(find_outliers(X['age']))

In [None]:
# Check for null values
X.isnull().values.any()

In [None]:
# Define the values to replce and the strategy of choosing the replacement value
from sklearn.preprocessing import Imputer
suspected_cols = ['duration', 'credit_amount', 'age']
imp = Imputer(missing_values="NaN", strategy="mean")

pd.DataFrame(X)[suspected_cols] = imp.fit_transform(pd.DataFrame(X)[suspected_cols])
pd.DataFrame(X).head(10)

In [None]:
# Check for null values
pd.DataFrame(X).isnull().values.any()

**10. Split our dataset into train and test datasets**

**Split non-preprocessed data**


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_enc,\
                                                    test_size=0.3, random_state=42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

**11. Scale our data**

In [None]:
# Use StandardScaler
scaler = preprocessing.StandardScaler().fit(X_train, y_train)
X_train_scaled = scaler.transform(X_train)

pd.DataFrame(X_train_scaled, columns=X_train.columns).head()

In [None]:
pd.DataFrame(y_train).head()

**12. Start building a classifier**

In [None]:
from sklearn.linear_model import LogisticRegression

#clf_lr = LogisticRegression(C=0.01, solver='liblinear')
clf_lr = LogisticRegression()
model = clf_lr.fit(X_train_scaled, y_train)
model

**13. Evaluate our model**

In [None]:
# Use the scaler fit on trained data to scale our test data
X_test_scaled = scaler.transform(X_test)
pd.DataFrame(X_test_scaled, columns=X_train.columns).head()

In [None]:
from sklearn.metrics import accuracy_score

y_pred_lr = clf_lr.predict(X_test_scaled)
acc_lr = accuracy_score(y_test, y_pred_lr)
print(acc_lr)



In [None]:
y_score_lr = clf_lr.decision_function(X_test_scaled)
print(y_score_lr)



In [None]:
from sklearn.metrics import average_precision_score

average_precision_lr = average_precision_score(y_test, y_score_lr)

print('Average precision-recall score: {0:0.2f}'.format(
      average_precision_lr))

**14. ROC Curve and models comparisons**

In [None]:
# Plot SVC ROC Curve
plt.figure(0, figsize=(15,10)).clf()

fpr_lr, tpr_lr, thresh_lr = metrics.roc_curve(y_test, y_score_lr)
auc_lr = metrics.roc_auc_score(y_test, y_score_lr)
plt.plot(fpr_lr, tpr_lr, label="Logistic Regression on Preprocessed Data, auc=" + str(auc_lr))

plt.legend(loc=0)
plt.xlabel('False Positives')
plt.ylabel('True Positives')


**Bonus: Deploy model on the cloud using IBM Watson Machine Learning**

We have our model, but we want to use it through multiple apps. A solution is to deploy it on the cloud as an endpoint (url) and send data collected from a web/mobile app as a REST API call with data sent in the form of a JSON request.


Nel servizio Watson Machine Learning crea un nuovo set di credenziali che chiami loanapproval.
Prendi nota dello username, password e URL.

headers = urllib3.util.make_headers(basic_auth='{}:{}'.format ( 'username','password'))

url = '{}/v3/identity/token'.format('URL')

In [None]:
# To work with the Watson Machine Learning REST API you must generate a Bearer access token
import urllib3, requests, json

headers = urllib3.util.make_headers(basic_auth='{}:{}'.format ( 'USERNAME','PASSWORD'))
url = '{}/v3/identity/token'.format('URL')
response = requests.get(url, headers=headers)
ml_token = 'Bearer ' + json.loads(response.text).get('token')
print(ml_token)

nell'endpoint_instance inserisci i parametri delle credenziali

endpoint_instance = 'URL' + "/v3/wml_instances/" + 'instance_id'


In [None]:
# Create an online scoring endpoint

endpoint_instance = 'URL' + "/v3/wml_instances/" + 'instance_id'
header = {'Content-Type': 'application/json', 'Authorization': ml_token}

response_get_instance = requests.get(endpoint_instance, headers=header)
print(response_get_instance)
print(response_get_instance.text)

inseriamo le credenziali nella chiamata per generare l'API client
wml_credentials = { "url"         : "URL",
                    "username"    : "Username",
                    "password"    : "password",
                    "instance_id" : "instanceid"
                   }

In [None]:
# Create API client

from watson_machine_learning_client import WatsonMachineLearningAPIClient
wml_credentials = { "url"         : "URL",
                    "username"    : "USERNAME",
                    "password"    : "PASSWORD",
                    "instance_id" : "INSTANCEID"
                   }

client = WatsonMachineLearningAPIClient(wml_credentials)

In [None]:
# Publish model in Watson Machine Learning repository on Cloud

model_props = {client.repository.ModelMetaNames.AUTHOR_NAME: "DaniZu", 
               client.repository.ModelMetaNames.NAME: "Loan Approval Model"}

In [None]:
published_model = client.repository.store_model(model=model, meta_props=model_props, \
                                                training_data=X_train_scaled, training_target=y_train)

In [None]:
# Create model deployment

published_model_uid = client.repository.get_model_uid(published_model)
created_deployment = client.deployments.create(published_model_uid, "Deployment of Loan Approval Model")

In [None]:
# Get Scoring URL
scoring_endpoint = client.deployments.get_scoring_url(created_deployment)

print(scoring_endpoint)

In [None]:
# Get model details and expected input
model_details = client.repository.get_details(published_model_uid)
print(json.dumps(model_details, indent=2))