### Model scoring function tested with X_test

Original dataset, split into train and test, X_test has been sent to the scoring function for testing

In [1]:
import pandas as pd
data = pd.read_csv('SBA_loans_project_2.csv')

In [2]:
X = data.drop(columns=['MIS_Status'])
Y = data.MIS_Status

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=True,random_state=1301)

In [4]:
def project_2_scoring(data):
    from sklearn.preprocessing import LabelEncoder
    import category_encoders as ce
    from copy import deepcopy
    from sklearn.preprocessing import MinMaxScaler
    import pickle
    from lightgbm import LGBMClassifier 
    
    
    def currencyCleaning(x):
        x = x[1:].replace(',','')
        return x
    data['DisbursementGross'] = data['DisbursementGross'].apply(currencyCleaning).astype(float).astype(int)
    data['SBA_Appv'] = data['SBA_Appv'].apply(currencyCleaning).astype(float).astype(int)
    data['BalanceGross'] = data['BalanceGross'].apply(currencyCleaning).astype(float).astype(int)
    data['GrAppv'] = data['GrAppv'].apply(currencyCleaning).astype(float).astype(int)

    data = data[data.NAICS != 0]
    data = data[(data.NewExist == 1.0) | (data.NewExist == 2.0)]
    data = data[(data.RevLineCr == 'Y') | (data.RevLineCr == 'N')]
    data = data[(data.LowDoc == 'Y') | (data.LowDoc == 'N')]
    data["Zip"] = data["Zip"].apply(str)
    
    
    loaded_scalers = pickle.load(open('scalers.sav', 'rb'))
    loaded_encoders = pickle.load(open('categorical_encoders.sav', 'rb'))
    loaded_model = pickle.load(open('lgbm_model.sav', 'rb'))
    
    
    cols_to_drop = []
    cat_encoders = {}
    cat_enc_columns = []

    for col in data.columns:
        if data[col].dtype == 'object':
            print("Target Encode scaling of ", col)
            enc = ce.TargetEncoder()
            enc = loaded_encoders[col][0]
            new_col_name = col+"_trg"
            data[new_col_name] = enc.transform(data[[col]])
            cat_encoders[col] = [deepcopy(enc),"trg"]
            cat_enc_columns.append(new_col_name)
        
        cols_to_drop.append(col)
            
    num_scalers = {}
    '''Scale only original columns'''
    for col in data.drop(columns=cat_enc_columns):
      if pd.api.types.is_numeric_dtype(data[col].dtype):
        print("MinMax scale of ", col)
        scaler = loaded_scalers[col][0]
        data[col+"_sc"] = scaler.transform(data[[col]])
    
        num_scalers[col] = [deepcopy(scaler),"MinMax"]
        cols_to_drop.append(col)
        
    X = data.drop(columns=cols_to_drop)
    
    
    y_predicted = loaded_model.predict(X)

    return list(y_predicted)

In [5]:
project_2_scoring(X_test)

Target Encode scaling of  City
Target Encode scaling of  State
Target Encode scaling of  Zip
Target Encode scaling of  Bank
Target Encode scaling of  BankState
Target Encode scaling of  RevLineCr
Target Encode scaling of  LowDoc
MinMax scale of  NAICS
MinMax scale of  NoEmp
MinMax scale of  NewExist
MinMax scale of  CreateJob
MinMax scale of  RetainedJob
MinMax scale of  FranchiseCode
MinMax scale of  UrbanRural
MinMax scale of  DisbursementGross
MinMax scale of  BalanceGross
MinMax scale of  GrAppv
MinMax scale of  SBA_Appv


[1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
