In [1]:
import pandas as pd
import numpy as np

In [17]:
data = pd.read_csv('/content/loans.csv')
data.head()

Unnamed: 0,Applicant_ID,Age,Annual_Income,Credit_Score,Employment_Years,Loan_Amount_Requested,Default
0,10,32,82133,689,1,10789,No
1,38,30,53172,588,3,5442,Yes
2,6,31,90000,573,4,5000,Yes
3,15,29,74634,621,7,16074,Yes
4,35,36,78232,701,5,17742,No


In [11]:
def current_stat(df):
    describe = pd.DataFrame(index=df.columns.to_list())
    describe['types'] = df.dtypes
    describe['count'] = df.count()
    describe['nunique'] = df.nunique()
    describe['null'] = df.isnull().sum()
    return describe

In [18]:
current_stat(data)

Unnamed: 0,types,count,nunique,null
Applicant_ID,int64,5000,46,0
Age,int64,5000,28,0
Annual_Income,int64,5000,3206,0
Credit_Score,int64,5000,181,0
Employment_Years,int64,5000,16,0
Loan_Amount_Requested,int64,5000,3686,0
Default,object,5000,2,0


In [19]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split

# Features and target variable
X = data.drop(columns=['Default', 'Applicant_ID'])
y = data['Default']

# Columns to be scaled
numeric_features = ['Annual_Income', 'Credit_Score', 'Employment_Years', 'Loan_Amount_Requested']

# Column to be binned and one-hot encoded
categorical_features = ['Age']

# Create transformers
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', MinMaxScaler())
])
categorical_transformer = Pipeline(steps=[
    ('bin', KBinsDiscretizer(n_bins=6, encode='ordinal', strategy='quantile')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Combine all transformers into a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create and evaluate the pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', LogisticRegression())])
pipeline

In [22]:
from sklearn.model_selection import train_test_split

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# print train and test set shape
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_test:", y_test.shape)

# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)

# generate predictions
y_pred = pipeline.predict(X_test)
y_pred_prob = pipeline.predict_proba(X_test)[:, 1]

# test accuracy on X_test
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred))

X_train: (3500, 5)
X_test: (1500, 5)
y_train: (3500,)
y_test: (1500,)
0.9606666666666667


In [30]:
!pip install modelbit



In [33]:
# run on top of your notebook
import modelbit
mb = modelbit.login()

In [34]:
import pandas as pd
import numpy as np

# first define function
def predict_loan_default(Age: int, Annual_Income: float, Credit_Score: int, Employment_Years: float, Loan_Amount_Requested: float)-> float:

  """
  Predict the probability of loan default using a pre-trained machine learning pipeline.

  Args:
      Age (int): Applicant's age.
      Annual_Income (float): Applicant's annual income.
      Credit_Score (int): Applicant's credit score.
      Employment_Years (float): Number of years employed.
      Loan_Amount_Requested (float): Requested loan amount.

  Returns:
      float: Probability of loan default.
  """

  return pipeline.predict_proba(pd.DataFrame([[Age, Annual_Income, Credit_Score, Employment_Years, Loan_Amount_Requested]],
                                             columns = ['Age', 'Annual_Income', 'Credit_Score', 'Employment_Years', 'Loan_Amount_Requested']))[:,1]

In [35]:
predict_loan_default(32, 821233, 689, 1, 10789)

array([4.14205638e-27])

In [36]:
# deploy predict_loan_default
mb.deploy(predict_loan_default)


In [39]:
import requests
import json

url = "https://induwararathnayake.us-east-2.aws.modelbit.com/v1/predict_loan_default/latest"
headers = {
    'Content-Type': 'application/json'
}
data = {
    "data": [30, 53172, 588, 3, 42]
}

response = requests.post(url, headers=headers, json=data)
response_json = response.json()

print(json.dumps(response_json, indent=4))

{
    "data": [
        0.9988351752836807
    ]
}
