## Import libraries

In [23]:
import sys 
import pickle
import pandas as pd
from snowflake.snowpark import Session 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler



## Check Python use 3.8

In [2]:
print("Python Current Version:-", sys.version)

Python Current Version:- 3.8.13 (default, Mar 28 2022, 06:59:08) [MSC v.1916 64 bit (AMD64)]


## Connecting To Snowflake Database

In [3]:
# Snowflake connection
conn = {
  "account": "",
  "user": "",
  "password": "",
  "role": "",
  "warehouse": "",
  "database": "",
  "schema": ""
 }

session = Session.builder.configs(conn).create()
print(session.sql("select current_warehouse(), current_database(), current_schema()").collect())  

[Row(CURRENT_WAREHOUSE()='COMPUTE_WH', CURRENT_DATABASE()='CREDIT_CARD_APPROVAL_PREDICTION', CURRENT_SCHEMA()='PUBLIC')]


##  Call Require Tables

In [4]:
APPLICATION_RECORD = session.sql("""
SELECT ID,
       FLAG_OWN_REALTY AS OWN_A_PROPERTY , 
       AMT_INCOME_TOTAL AS INCOME, 
       NAME_FAMILY_STATUS AS MARITAL_STATUS, 
       NAME_HOUSING_TYPE AS TYPE_OF_HOUSING
FROM APPLICATION_RECORD
""")
X_AR = APPLICATION_RECORD.toPandas().set_index('ID')


CREDIT_RECORD =session.table('CREDIT_RECORD')
Y_CR = CREDIT_RECORD.toPandas().set_index('ID').head(438557)

## Checking Tables

In [6]:
X_AR

Unnamed: 0_level_0,OWN_A_PROPERTY,INCOME,MARITAL_STATUS,TYPE_OF_HOUSING
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5008804,Y,427500,Civil marriage,Rented apartment
5008805,Y,427500,Civil marriage,Rented apartment
5008806,Y,112500,Married,House / apartment
5008808,Y,270000,Single / not married,House / apartment
5008809,Y,270000,Single / not married,House / apartment
...,...,...,...,...
6840104,Y,135000,Separated,House / apartment
6840222,N,103500,Single / not married,House / apartment
6841878,N,54000,Single / not married,With parents
6842765,Y,72000,Married,House / apartment


In [5]:
Y_CR

Unnamed: 0_level_0,MONTHS_BALANCE,STATUS
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
5001711,0,X
5001711,-1,0
5001711,-2,0
5001711,-3,0
5001712,0,C
...,...,...
5053255,-33,0
5053255,-34,0
5053255,-35,0
5053255,-36,0


### Coverting String Into a Integers 

In [7]:
X_AR = X_AR.replace({ 'OWN_A_PROPERTY': {'Y':1, 'N':0}, 
                    'MARITAL_STATUS': {'Civil marriage':0, 'Married':1, 'Single / not married':2, 'Separated':3, 'Widow':4},
                    'TYPE_OF_HOUSING': {'Rented apartment':0, 'House / apartment':1, 'With parents':2, 'Municipal apartment':3, 'Co-op apartment':4,'Office apartment':5}})

Y_CR = Y_CR.replace({'STATUS': {'C':6, 'X':7}})

## Reviewing the Data to Confirm Conversion 

In [8]:
X_AR

Unnamed: 0_level_0,OWN_A_PROPERTY,INCOME,MARITAL_STATUS,TYPE_OF_HOUSING
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5008804,1,427500,0,0
5008805,1,427500,0,0
5008806,1,112500,1,1
5008808,1,270000,2,1
5008809,1,270000,2,1
...,...,...,...,...
6840104,1,135000,3,1
6840222,0,103500,2,1
6841878,0,54000,2,2
6842765,1,72000,1,1


In [9]:
Y_CR

Unnamed: 0_level_0,MONTHS_BALANCE,STATUS
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
5001711,0,7
5001711,-1,0
5001711,-2,0
5001711,-3,0
5001712,0,6
...,...,...
5053255,-33,0
5053255,-34,0
5053255,-35,0
5053255,-36,0


In [53]:
Y_CR.STATUS.unique()

array([7, 0, 6, 1, 2, 3, 4, 5], dtype=int8)

### Checking and Changing Data types

In [10]:
X_AR.dtypes

OWN_A_PROPERTY     int64
INCOME             int32
MARITAL_STATUS     int64
TYPE_OF_HOUSING    int64
dtype: object

In [11]:
Y_CR.dtypes

MONTHS_BALANCE      int8
STATUS            object
dtype: object

In [12]:
Y_CR = Y_CR.astype({'STATUS': 'int8'})
Y_CR.dtypes

MONTHS_BALANCE    int8
STATUS            int8
dtype: object

### Spitting the Data and Counting Arrays

In [13]:
X = X_AR[['OWN_A_PROPERTY', 'INCOME', 'MARITAL_STATUS', 'TYPE_OF_HOUSING']]
Y = Y_CR.STATUS
X.shape, Y.shape

((438557, 4), (438557,))

In [14]:
X_train, X_test, Y_train, Y_test  = train_test_split(X, Y)

## Counting Dimensions

In [15]:
print (X_train.ndim, X_test.ndim)
print (Y_train.ndim, Y_test.ndim)

2 2
1 1


### StandardScaler Preprocessor

In [16]:
# Standardize the features
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Train the Model with RandomForestClassifier

In [17]:
model = RandomForestClassifier(n_estimators=250, max_depth=3) 
model.fit(X_train, Y_train)
##max_depth=2, random_state = 0 n_estimators=250

## Make predictions on the sets

In [None]:
pred_test = model.predict(X_test)
pred_train = model.predict(X_train)

print("Test accuracy:", accuracy_score(Y_test, pred_test))
print("Train accuracy:", accuracy_score(Y_train, pred_train))


Test accuracy: 0.41732944180955855
Train accuracy: 0.41915741661270167


## Predicting array 

In [19]:
pred_test[:20]

array([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
      dtype=int8)

## Classification Report 

In [63]:
print(classification_report(Y_test, pred_test))

              precision    recall  f1-score   support

           0       0.76      0.00      0.00     39836
           1       0.00      0.00      0.00      1085
           2       0.00      0.00      0.00        95
           3       0.00      0.00      0.00        23
           4       0.00      0.00      0.00        12
           5       0.00      0.00      0.00       126
           6       0.42      1.00      0.59     45938
           7       1.00      0.00      0.00     22525

    accuracy                           0.42    109640
   macro avg       0.27      0.13      0.07    109640
weighted avg       0.66      0.42      0.25    109640



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Saving Model to pickle 

In [24]:
# Save the trained model and scaler to files
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)