<a href="https://colab.research.google.com/github/Tavares2020/Tavares2020/blob/main/10_producing_scores_without_tags_on_blindholdout_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **FICO Analytic Challenge © Fair Isaac 2025**

# Week 10: Generating Features and Scores on Blind Holdout set (without Tags)

## Mount the Google Drive

In [None]:
import os
import sys
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive/', force_remount=True)

# Base path for your project
path = '/content/drive/MyDrive/FICO Analytic Challenge'

# Folder that's holding dataset
data = 'Data'

# Model Folder's name
modelFolder='Model'

# Model's prefix name; Used to distinguish model's output files
model='NNet'

# Path to the "Data" and "Model" folder
data_path = os.path.join(path, data)
model_path = os.path.join(path, modelFolder)

# Path to location where trained model will be saved. Create the directory if it doesn't exist
os.makedirs(model_path, exist_ok = True)

# import scale file
scaleFilePrefix='scaler2'
scaleFile = os.path.join(model_path, scaleFilePrefix + '.' + model + '.' + data + ".pkl")

# Subdirectories to include in sys.path
subdirectories = [data, 'DNU_Functions', modelFolder] + [f'Week_{str(i).zfill(2)}' for i in range(12)]

# Add each subdirectory to sys.path
for subdir in subdirectories:
    sys.path.append(os.path.join(path, subdir))

# Change working directory to the base path
os.chdir(path)
print(os.getcwd())

Mounted at /content/drive/
/content/drive/MyDrive/FICO Analytic Challenge


### Import the required libraries

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# import the necessary libaries
import numpy as np
import pandas as pd
from pickle import dump, load
import math
from fico_functions import *
from IPython.display import display, Math, Latex, Image

# Sci-kit learn libraries
from sklearn.preprocessing import MinMaxScaler

# Plotting library
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# Pytorch libraries
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import warnings
warnings.filterwarnings('ignore')

# Removing limitation in viewing pandas columns and rows
pd.set_option('display.max_columns', None, 'display.max_rows', None)

In [None]:
# Checking GPU compatibility
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

No GPU available. Training will run on CPU.


### Blind Holdout Dataset
- **test_C_**<font color='CornflowerBlue'>notags</font>**.csv** is the blind holdout dataset
    - you should have already created the features for it and named it either of the following:
        - **test_C_**<font color='DeepSkyBlue'>notags_features</font>**.csv**
            - if only using features from week 4
        - **test_C_**<font color='lightgreen'>notags_advanced_features</font>**.csv**
            - if also using week 8
- **score.NNet.test_C_**<font color='DeepSkyBlue'>notags_features</font>**.csv** or **score.NNet.test_C_**<font color='lightgreen'>notags_advanced_features</font>**.csv**
    - this should have scores from your trained NNet model
    - this dataset doesn't have the following columns since it has "<font color='CornflowerBlue'>**notags**</font>"
        - mdlIsFraudTrx
        - mdlIsFraudAcct
- <font color='Cyan'>**score.NNet.test_C_features.csv**</font> or <font color='MediumPurple'>**score.NNet.test_C_advanced_features.csv**</font>
    - this is the file's name that we'll return to you which includes the tags

In [None]:
# dataset file prefix
blindholdoutFile = ['test_C_notags']

# Set to "True" if importing base dataset; default is False
base=False

# Set to "True" if creating advanced features; default is False
creatingAdvancedFeatures=False

# Set to "True" if using advanced features, "False" if using regular features and not base; default is False
usingAdvancedFeatures=True

In [None]:
# Add the feature columns you want to remove as inputs in model to the list below
# columnsToRemove = ['DistanceFlagkm'],  #, '1m', 'DistanceFlagkm', 'ewm_1D', 'ratio_14D_to_60D', 'repeat_hi_amt_1H', 'rolling_mean_14D', 'rolling_mean_60D']


In [None]:
# Add the feature columns you want to remove as inputs in model to the list below
columnsToRemove = ['transactionHour','DistanceFlagkm']  #, '1m', 'DistanceFlagkm', 'ewm_1D', 'ratio_14D_to_60D', 'repeat_hi_amt_1H', 'rolling_mean_14D', 'rolling_mean_60D']


In [None]:
#hyper-parameters
num_hidden_units = 15

In [None]:
# Handle num_hidden_units being a list or an integer
if isinstance(num_hidden_units, list):
    num_hidden_units_str = "-".join(map(str, num_hidden_units))
else:
    num_hidden_units_str = str(num_hidden_units)

In [None]:
# Import the best LAUC Model
laucModel = os.path.join(model_path, f"model_best_valid_lauc.{num_hidden_units_str}nodes.{scaleFilePrefix}.pt")

# Load the entire model
model_l_NNet = torch.load(laucModel,weights_only=False )
model_l_NNet.to(device)
model_l_NNet.eval()

NNet(
  (layers): ModuleList(
    (0): Linear(in_features=39, out_features=15, bias=True)
    (1): Linear(in_features=15, out_features=1, bias=True)
  )
  (batch_norms): ModuleList(
    (0): BatchNorm1d(15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (dropouts): ModuleList(
    (0): Dropout(p=0.4, inplace=False)
  )
  (tanh): Tanh()
  (sigmoid): Sigmoid()
)

In [None]:
df_blind_holdout = blind_holdout_score_NNet(data_path, blindholdoutFile, scaleFile, device, columnsToRemove, model_l_NNet, creatingAdvancedFeatures, usingAdvancedFeatures, base=base, model=model, scaleFilePrefix=scaleFilePrefix)

Using [1;31mtest_C_notags_advanced_features.csv[0m to train model and/or produce scoreouts and perf metrics
[1;32mFile 1:[0m /content/drive/MyDrive/FICO Analytic Challenge/Data/test_C_notags_advanced_features.csv
[1;32mFile 2:[0m /content/drive/MyDrive/FICO Analytic Challenge/Data/score.NNet.test_C_notags_advanced_features.scaler2.csv

[1;33mColumns in:[0m test_C_notags
Number of Columns in Base: 25
Base Columns: ['cardholderState', 'pan', 'transactionDateTime', 'merch_lat', 'long', 'merchant', 'transactionAmount', 'zip', 'category', 'last', 'cardholderCountry', 'trans_num', 'merchState', 'job', 'first', 'merch_long', 'street', 'unix_time', 'lat', 'merchCountry', 'city_pop', 'dob', 'gender', 'is_train', 'deltaTime']
Number of Features: 41
Input Features: ['1m', 'DistanceFlagkm', 'IsHighValue', 'RelativeAmount', 'amount_diff', 'amt_trend_24h', 'amt_trend_5e', 'category_ratio', 'count_trend_1h', 'ewm_1D', 'is_CNP', 'is_CP', 'is_high_amount', 'is_high_inperson', 'is_high_merch_cou

In [None]:
df_blind_holdout.head()

Unnamed: 0,pan,merchant,category,transactionAmount,first,last,gender,street,zip,lat,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,transactionDateTime,is_train,merchCountry,merchState,cardholderCountry,cardholderState,is_international,is_high_trans_freq_user_avg_vol,num_hi_amt_last_hour,is_high_repeat_amount,is_CNP,is_high_amount,user_avg_amount_week,amt_trend_24h,transactionHour,is_high_transaction_amount_user_avg,is_high_person,category_ratio,num_transactions,is_high_inperson,deltaTime,user_avg_amount_month,is_high_trans_freq,IsHighValue,amount_diff,num_last_24_hours,user_avg_amount,count_trend_1h,is_high_volume,user_avg_volume_week,transactionFrequency,is_high_transaction_amount,amt_trend_5e,user_avg_volume_month,user_avg_volume,is_late_night,DistanceFlagkm,is_high_merch_country,repeat_amt,is_CP,RelativeAmount,is_high_online,rolling_mean_60D,rolling_mean_14D,repeat_hi_amt_1H,ratio_14D_to_60D,ewm_1D,1m,y_preds,score
0,C002898353840886A,Brekke and Sons,gas_transport,68.220001,Charles,Copeland,M,92213 Lee Well,33404,26.783199,-80.063797,459921,"Administrator, arts",1969-09-08,3in256ozjxrdg9y2ptwih0wn5tgkqiw4,1325671108,25.784525,-79.915947,2020-05-04 09:58:28,0,US,Florida,US,Florida,5.010387e-10,0.0,1.497091e-08,-1.413044e-10,3.617157e-09,-1.373211e-09,68.220001,68.220001,9,-1.3179e-08,3.397931e-10,-2.564548e-09,363.0,1.496411e-10,0.0,68.220001,6.86297e-09,5.943761e-09,0.01,1.0,68.220001,1.0,1.0,363.0,1.0,-1.3179e-08,68.220001,363.0,363.0,3.01364e-09,0,5.010387e-10,-1.413044e-10,1.0,1.0,1.496411e-10,68.220001,68.220001,68.220001,1.0,68.220001,68.220001,0.001027,236
1,C002898353840886A,Douglas-White,entertainment,4.03,Charles,Copeland,M,92213 Lee Well,33404,26.783199,-80.063797,459921,"Administrator, arts",1969-09-08,29s7m7wnjqc7kbby2n7gl0wtlag2cwb4,1325753889,26.367189,-79.473663,2020-05-05 08:58:09,0,BS,West Grand Bahama,US,Florida,0.9999999,0.0,1.497091e-08,-1.413044e-10,3.617157e-09,-1.373211e-09,36.125,36.125004,8,-1.3179e-08,3.397931e-10,-2.564548e-09,363.0,1.496411e-10,82781.0,36.125004,6.86297e-09,5.943761e-09,-64.190002,2.0,68.220001,1.0,1.0,363.0,1.0,-1.3179e-08,36.125004,363.0,363.0,3.01364e-09,0,0.9999999,-1.413044e-10,1.0,0.059074,1.496411e-10,36.125,36.125,4.03,1.0,36.125,4.030009,0.006721,335
2,C002898353840886A,"Ritchie, Oberbrunner and Cremin",travel,3.43,Charles,Copeland,M,92213 Lee Well,33404,26.783199,-80.063797,459921,"Administrator, arts",1969-09-08,dwruac21w0n7g8prlapny8rnf29ddp25,1325784686,26.459335,-80.743965,2020-05-05 17:31:26,0,US,Florida,US,Florida,5.010387e-10,0.0,1.497091e-08,-1.413044e-10,3.617157e-09,-1.373211e-09,25.226665,3.730002,17,-1.3179e-08,3.397931e-10,-2.564548e-09,363.0,1.496411e-10,30797.0,25.226664,6.86297e-09,5.943761e-09,-0.6,2.0,36.125,1.0,1.0,363.0,1.0,-1.3179e-08,25.226669,363.0,363.0,3.01364e-09,0,5.010387e-10,-1.413044e-10,1.0,0.094948,1.496411e-10,25.226669,25.226667,3.430001,1.0,19.7775,3.430003,0.0012,245
3,C002898353840886A,"Schumm, Bauch and Ondricka",grocery_pos,59.77,Charles,Copeland,M,92213 Lee Well,33404,26.783199,-80.063797,459921,"Administrator, arts",1969-09-08,2o6fqlq35faapgeu18s51l4pat3rdlx8,1325903948,26.817312,-80.26786,2020-05-07 02:39:08,0,US,Florida,US,Florida,5.010387e-10,0.0,1.497091e-08,-1.413044e-10,3.617157e-09,-1.373211e-09,33.862499,59.77,2,1.0,3.397931e-10,-2.564548e-09,363.0,1.496411e-10,119262.0,33.862499,6.86297e-09,5.943761e-09,56.34,1.0,25.226665,1.0,1.0,363.0,1.0,1.0,33.862503,363.0,363.0,1.0,0,5.010387e-10,-1.413044e-10,1.0,2.369318,1.496411e-10,33.862499,33.862495,59.77,1.0,39.77375,59.77,0.009064,351
4,C002898353840886A,"Welch, Rath and Koepp",entertainment,73.059998,Charles,Copeland,M,92213 Lee Well,33404,26.783199,-80.063797,459921,"Administrator, arts",1969-09-08,obz3oahfcy12akikfwq716guk7ckufjc,1325937206,27.22069,-80.339584,2020-05-07 11:53:26,0,US,Florida,US,Florida,5.010387e-10,0.0,1.497091e-08,-1.413044e-10,3.617157e-09,-1.373211e-09,41.702,66.415001,11,1.0,3.397931e-10,-2.564548e-09,363.0,1.496411e-10,33258.0,41.702,6.86297e-09,5.943761e-09,13.29,2.0,33.862503,1.0,1.0,363.0,1.0,1.0,41.701996,363.0,363.0,3.01364e-09,0,5.010387e-10,-1.413044e-10,1.0,2.157549,1.496411e-10,41.702,41.702,73.059998,1.0,56.416874,73.059998,0.000498,199
