#### Repeating the Same Process - Ground Truth Climate & WNV Case Data 

We will see how actual climatic data compares with the embeddings. 

By: Angel Moreno

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import pickle
import warnings
import logging

from pytorch_tabnet.tab_model import TabNetRegressor
import shap

from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
from kneed import KneeLocator

import torch
import optuna 
from optuna.samplers import TPESampler

#### Data Cleaning - Compiling 8 Clean CSVs Ready for Analysis 	

The goal of this section is to have one csv with all climate, water, and ndvi data for each of 2017 to 2024.

In [168]:
df_climate = pd.read_csv("../input/anuj_true_data_csvs/Angel_ClimateData.csv")

In [169]:
# climate data
df_climate = pd.read_csv("../input/anuj_true_data_csvs/Angel_ClimateData.csv")
# water data
df_water = pd.read_csv("../input/anuj_true_data_csvs/Angel_WaterData.csv")
# mean ndvi data
df_ndvi = pd.read_csv("../input/anuj_true_data_csvs/Angel_meanNDVI.csv")

As seen below, the ordering of counties should be consistent across all, so we may merge horizontally in place. I will also check this with one of my master csvs with the normalized human case data column.

In [170]:
case_data_ex = pd.read_csv("../finished_csvs/master_2022.csv")
case_data_ex.columns

Index(['County', 'GEOID', 'A00', 'A01', 'A02', 'A03', 'A04', 'A05', 'A06',
       'A07', 'A08', 'A09', 'A10', 'A11', 'A12', 'A13', 'A14', 'A15', 'A16',
       'A17', 'A18', 'A19', 'A20', 'A21', 'A22', 'A23', 'A24', 'A25', 'A26',
       'A27', 'A28', 'A29', 'A30', 'A31', 'A32', 'A33', 'A34', 'A35', 'A36',
       'A37', 'A38', 'A39', 'A40', 'A41', 'A42', 'A43', 'A44', 'A45', 'A46',
       'A47', 'A48', 'A49', 'A50', 'A51', 'A52', 'A53', 'A54', 'A55', 'A56',
       'A57', 'A58', 'A59', 'A60', 'A61', 'A62', 'A63', 'Cases_2022',
       '2022_population', 'Cases_2022_normalized'],
      dtype='object')

In [171]:
print(df_climate.head()["County_1"].iloc[:5])
print(df_ndvi.head()["County_1"].iloc[:5])

# this should be 102 if all are equal
print(sum((df_climate["County_1"] == df_ndvi["County_1"])*1)==102)

0        Adams
1    Alexander
2         Bond
3        Boone
4        Brown
Name: County_1, dtype: object
0        Adams
1    Alexander
2         Bond
3        Boone
4        Brown
Name: County_1, dtype: object
True


In [172]:
# now I am checking for my data
print(sum((case_data_ex["County"] == df_climate["County_1"])*1)==102)

print(case_data_ex["County"].is_monotonic_increasing) # seems like my county names are only alphabetical
print(df_climate["County_1"].is_monotonic_increasing)
print(df_ndvi["County_1"].is_monotonic_increasing)

False
True
False
False


I will reorder my dataframe to fit ordering of df_climate / df_ndvi.

In [173]:
source_order = df_climate["County_1"].unique()
case_data_ex["County"] = pd.Categorical(case_data_ex["County"], categories=source_order, ordered=True)
case_data_ex

Unnamed: 0,County,GEOID,A00,A01,A02,A03,A04,A05,A06,A07,...,A57,A58,A59,A60,A61,A62,A63,Cases_2022,2022_population,Cases_2022_normalized
0,Adams,17001.0,-0.067239,-0.065779,0.061984,-0.170445,0.049984,-0.109669,-0.038933,0.111507,...,0.080336,0.056306,-0.019753,-0.151427,-0.078184,-0.160633,0.029502,0.0,64526,0.0
1,Alexander,17003.0,-0.100853,-0.048913,-0.008852,-0.076554,0.005164,-0.063323,0.025426,0.088139,...,0.095372,0.059891,-0.053308,-0.080305,-0.104070,-0.095462,0.045853,0.0,4859,0.0
2,Bond,17005.0,-0.108974,-0.064307,0.034625,-0.117393,0.029814,-0.072641,0.010907,0.111090,...,0.107193,0.020510,-0.093062,-0.128969,-0.062519,-0.157703,0.039234,0.0,16716,0.0
3,Boone,17007.0,-0.036726,-0.081128,0.083217,-0.217778,0.037748,-0.126451,-0.065230,0.112206,...,0.052234,0.042340,-0.109492,-0.138152,-0.037796,-0.157798,0.055174,0.0,53084,0.0
4,Brown,17009.0,-0.074731,-0.040546,0.053104,-0.123806,0.062233,-0.120984,-0.025486,0.138768,...,0.079358,0.054082,0.003168,-0.142994,-0.088903,-0.159073,0.031018,0.0,6321,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Whiteside,17195.0,-0.050599,-0.101328,0.046094,-0.203955,0.018187,-0.152153,-0.070677,0.122247,...,0.056901,0.040895,-0.099173,-0.156041,-0.027092,-0.142284,0.059885,0.0,54602,0.0
98,Will,17197.0,-0.038191,-0.143632,0.087410,-0.183026,0.077733,-0.091773,-0.084821,0.025606,...,0.062069,0.073197,-0.041747,-0.089485,-0.062679,-0.148837,0.036766,0.0,699110,0.0
99,Williamson,17199.0,-0.145247,-0.065627,-0.006135,-0.022172,0.025129,-0.067894,-0.011457,0.082523,...,0.126538,0.035682,-0.020514,-0.103149,-0.110587,-0.128714,0.016842,0.0,66565,0.0
100,Winnebago,17201.0,-0.030881,-0.128900,0.081142,-0.183157,0.051611,-0.116952,-0.079821,0.111883,...,0.063265,0.038038,-0.039094,-0.120788,-0.049664,-0.163948,0.032393,0.0,282081,0.0


In [174]:
print(sum(case_data_ex["County"] == df_climate["County_1"])*1)

print(list(case_data_ex["County"].unique()))
print(list(df_climate["County_1"].unique()))

82
['Adams', 'Alexander', 'Bond', 'Boone', 'Brown', 'Bureau', 'Calhoun', 'Carroll', 'Cass', 'Champaign', 'Christian', 'Clark', 'Clay', 'Clinton', 'Coles', 'Cook', 'Crawford', 'Cumberland', 'De Witt', 'DeKalb', 'Douglas', 'DuPage', 'Edgar', 'Edwards', 'Effingham', 'Fayette', 'Ford', 'Franklin', 'Fulton', 'Gallatin', 'Greene', 'Grundy', 'Hamilton', 'Hancock', 'Hardin', 'Henderson', 'Henry', 'Iroquois', 'Jackson', 'Jasper', 'Jefferson', 'Jersey', 'Jo Daviess', 'Johnson', 'Kane', 'Kankakee', 'Kendall', 'Knox', 'LaSalle', 'Lake', 'Lawrence', 'Lee', 'Livingston', 'Logan', 'Macon', 'Macoupin', 'Madison', 'Marion', 'Marshall', 'Mason', 'Massac', 'McDonough', 'McHenry', 'McLean', 'Menard', 'Mercer', 'Monroe', 'Montgomery', 'Morgan', 'Moultrie', 'Ogle', 'Peoria', 'Perry', 'Piatt', 'Pike', 'Pope', 'Pulaski', 'Putnam', 'Randolph', 'Richland', 'Rock Island', 'Saline', 'Sangamon', 'Schuyler', 'Scott', 'Shelby', 'St. Clair', 'Stark', 'Stephenson', 'Tazewell', 'Union', 'Vermilion', 'Wabash', 'Warren',

In [175]:
case_data_ex = case_data_ex.sort_values("County")
print(list(case_data_ex["County"].unique()))
print(list(df_climate["County_1"].unique()))

# IMPORTANT: RESET INDEX TO COMPARE SINCE I SHIFTED STUFF AROUND
print(sum(case_data_ex["County"].reset_index(drop=True) == df_climate["County_1"].reset_index(drop=True))*1)

['Adams', 'Alexander', 'Bond', 'Boone', 'Brown', 'Bureau', 'Calhoun', 'Carroll', 'Cass', 'Champaign', 'Christian', 'Clark', 'Clay', 'Clinton', 'Coles', 'Cook', 'Crawford', 'Cumberland', 'DeKalb', 'De Witt', 'Douglas', 'DuPage', 'Edgar', 'Edwards', 'Effingham', 'Fayette', 'Ford', 'Franklin', 'Fulton', 'Gallatin', 'Greene', 'Grundy', 'Hamilton', 'Hancock', 'Hardin', 'Henderson', 'Henry', 'Iroquois', 'Jackson', 'Jasper', 'Jefferson', 'Jersey', 'Jo Daviess', 'Johnson', 'Kane', 'Kankakee', 'Kendall', 'Knox', 'Lake', 'LaSalle', 'Lawrence', 'Lee', 'Livingston', 'Logan', 'McDonough', 'McHenry', 'McLean', 'Macon', 'Macoupin', 'Madison', 'Marion', 'Marshall', 'Mason', 'Massac', 'Menard', 'Mercer', 'Monroe', 'Montgomery', 'Morgan', 'Moultrie', 'Ogle', 'Peoria', 'Perry', 'Piatt', 'Pike', 'Pope', 'Pulaski', 'Putnam', 'Randolph', 'Richland', 'Rock Island', 'St. Clair', 'Saline', 'Sangamon', 'Schuyler', 'Scott', 'Shelby', 'Stark', 'Stephenson', 'Tazewell', 'Union', 'Vermilion', 'Wabash', 'Warren', 'W

In [176]:
df_climate

Unnamed: 0,County_1,FIPS,State,Population,X,Y,Hum_17,Pre_17,Tmax_17,Tmin_17,...,Hum_23,Pre_23,Tmax_23,Tmin_23,WS_23,Hum_24,Pre_24,Tmax_24,Tmin_24,WS_24
0,Adams,17001,Illinois,65435,-91.1848,39.9877,83.489720,2.414575,28.878265,16.430389,...,80.156819,2.598919,28.812063,16.643852,3.509917,86.957233,2.887003,28.874651,17.100551,3.662825
1,Alexander,17003,Illinois,5761,-89.3379,37.1964,92.409081,1.849715,29.800494,18.060720,...,92.354838,4.627896,29.968267,18.333393,2.979814,94.356359,3.490334,30.109471,18.601800,3.123587
2,Bond,17005,Illinois,16426,-89.4356,38.8868,87.526342,2.671293,29.578461,16.664752,...,87.931722,3.354472,28.878770,16.415715,3.666769,90.279601,3.576298,29.501770,17.693506,3.822610
3,Boone,17007,Illinois,53544,-88.8234,42.3231,94.299354,3.828716,26.579441,13.915454,...,84.941611,3.024114,26.706373,14.315155,3.842082,92.348444,3.880356,27.161379,14.746334,3.891934
4,Brown,17009,Illinois,6578,-90.7503,39.9618,86.754199,2.218834,28.874032,15.834936,...,83.882595,2.443424,28.595510,16.055039,3.595764,89.503626,3.176027,28.790812,16.788935,3.769309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Whiteside,17195,Illinois,55175,-89.9123,41.7553,90.371500,3.071367,27.478812,14.891202,...,82.829832,2.374838,27.772224,15.295255,3.415740,89.602076,3.247248,27.904806,15.772279,3.454726
98,Will,17197,Illinois,690743,-87.9787,41.4450,91.886131,2.779096,27.584172,14.851429,...,87.019067,3.472279,27.124073,14.942312,3.475865,90.327146,3.290599,28.393179,15.664390,3.600911
99,Williamson,17199,Illinois,66597,-88.9299,37.7303,90.827954,2.335025,29.802950,17.611845,...,91.053690,4.186029,30.152616,17.835427,2.890200,92.386553,3.691179,30.178877,18.317548,2.992142
100,Winnebago,17201,Illinois,282572,-89.1608,42.3363,93.717256,3.636186,26.617381,14.118006,...,82.816904,2.648312,27.056700,14.645746,3.844946,91.906219,3.870281,27.436860,14.906896,3.896780


In [177]:
print(df_climate.columns)
print(df_climate.shape)

Index(['County_1', 'FIPS', 'State', 'Population', 'X', 'Y', 'Hum_17', 'Pre_17',
       'Tmax_17', 'Tmin_17', 'WS_17', 'Hum_18', 'Pre_18', 'Tmax_18', 'Tmin_18',
       'WS_18', 'Hum_19', 'Pre_19', 'Tmax_19', 'Tmin_19', 'WS_19', 'Hum_20',
       'Pre_20', 'Tmax_20', 'Tmin_20', 'WS_20', 'Hum_21', 'Pre_21', 'Tmax_21',
       'Tmin_21', 'WS_21', 'Hum_22', 'Pre_22', 'Tmax_22', 'Tmin_22', 'WS_22',
       'Hum_23', 'Pre_23', 'Tmax_23', 'Tmin_23', 'WS_23', 'Hum_24', 'Pre_24',
       'Tmax_24', 'Tmin_24', 'WS_24'],
      dtype='object')
(102, 46)


In [178]:
print(df_water.columns)
print(df_water.shape)

Index(['FID', 'LSTNight_2017', 'LSTDay_2017', 'Chlorophyll_2017',
       'LSTNight_2018', 'LSTDay_2018', 'Chlorophyll_2018', 'LSTNight_2019',
       'LSTDay_2019', 'Chlorophyll_2019', 'LSTNight_2020', 'LSTDay_2020',
       'Chlorophyll_2020', 'LSTNight_2021', 'LSTDay_2021', 'Chlorophyll_2021',
       'LSTNight_2022', 'LSTDay_2022', 'Chlorophyll_2022', 'LSTNight_2023',
       'LSTDay_2023', 'Chlorophyll_2023', 'LSTNight_2024', 'LSTDay_2024',
       'Chlorophyll_2024'],
      dtype='object')
(102, 25)


In [179]:
print(df_ndvi.columns)
print(df_ndvi.shape)

Index(['County_1', 'FIPS', 'State', 'Population', 'X', 'Y', 'Ag_MEAN_2017',
       'Ag_MEAN_2018', 'Ag_MEAN_2019', 'Ag_MEAN_2020', 'Ag_MEAN_2021',
       'Ag_MEAN_2022', 'Ag_MEAN_2023', 'Ag_MEAN_2024', 'Fs_MEAN_2017',
       'Fs_MEAN_2018', 'Fs_MEAN_2019', 'Fs_MEAN_2020', 'Fs_MEAN_2021',
       'Fs_MEAN_2022', 'Fs_MEAN_2023', 'Fs_MEAN_2024', 'Rn_MEAN_2017',
       'Rn_MEAN_2018', 'Rn_MEAN_2019', 'Rn_MEAN_2020', 'Rn_MEAN_2021',
       'Rn_MEAN_2022', 'Rn_MEAN_2023', 'Rn_MEAN_2024'],
      dtype='object')
(102, 30)


In [180]:
df_master = pd.concat([df_climate, df_water, df_ndvi], axis=1)
df_master

Unnamed: 0,County_1,FIPS,State,Population,X,Y,Hum_17,Pre_17,Tmax_17,Tmin_17,...,Fs_MEAN_2023,Fs_MEAN_2024,Rn_MEAN_2017,Rn_MEAN_2018,Rn_MEAN_2019,Rn_MEAN_2020,Rn_MEAN_2021,Rn_MEAN_2022,Rn_MEAN_2023,Rn_MEAN_2024
0,Adams,17001,Illinois,65435,-91.1848,39.9877,83.489720,2.414575,28.878265,16.430389,...,0.80,0.81,0.76,0.76,0.77,0.74,0.79,0.78,0.77,0.7783
1,Alexander,17003,Illinois,5761,-89.3379,37.1964,92.409081,1.849715,29.800494,18.060720,...,0.84,0.84,0.67,0.65,0.65,0.68,0.68,0.60,0.59,0.6248
2,Bond,17005,Illinois,16426,-89.4356,38.8868,87.526342,2.671293,29.578461,16.664752,...,0.82,0.83,0.80,0.79,0.82,0.75,0.81,0.82,0.80,0.8093
3,Boone,17007,Illinois,53544,-88.8234,42.3231,94.299354,3.828716,26.579441,13.915454,...,0.81,0.80,0.76,0.73,0.79,0.74,0.79,0.80,0.75,0.7795
4,Brown,17009,Illinois,6578,-90.7503,39.9618,86.754199,2.218834,28.874032,15.834936,...,0.83,0.83,0.78,0.78,0.79,0.75,0.78,0.79,0.79,0.7871
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Whiteside,17195,Illinois,55175,-89.9123,41.7553,90.371500,3.071367,27.478812,14.891202,...,0.80,0.80,0.74,0.69,0.77,0.74,0.73,0.75,,0.7413
98,Will,17197,Illinois,690743,-87.9787,41.4450,91.886131,2.779096,27.584172,14.851429,...,0.75,0.75,0.75,0.77,0.78,0.70,0.78,0.78,,0.7786
99,Williamson,17199,Illinois,66597,-88.9299,37.7303,90.827954,2.335025,29.802950,17.611845,...,0.82,0.81,0.77,0.72,,0.75,,0.78,,0.7805
100,Winnebago,17201,Illinois,282572,-89.1608,42.3363,93.717256,3.636186,26.617381,14.118006,...,0.78,0.79,,0.76,,,,,,#DIV/0!


In [181]:
# storing the three consistent informative columns (county name, fips, and population as well if needed)
df_info = df_master.iloc[:, :4].drop(columns=["State"]).copy()
df_info

Unnamed: 0,County_1,FIPS,Population
0,Adams,17001,65435
1,Alexander,17003,5761
2,Bond,17005,16426
3,Boone,17007,53544
4,Brown,17009,6578
...,...,...,...
97,Whiteside,17195,55175
98,Will,17197,690743
99,Williamson,17199,66597
100,Winnebago,17201,282572


In [182]:
source_order = df_climate["County_1"].unique()

# test run for 2017 only
year_short = 2017-2000
df_2017 = pd.read_csv(f"../finished_csvs/master_20{year_short}.csv")

df_2017["County"] = pd.Categorical(df_2017["County"], categories=source_order, ordered=True)
df_2017 = df_2017.reset_index(drop=True)

# df for the curr year
# this uses regex to keep anything with 17

df_curr_2017_master = df_master.filter(regex=rf'{year_short}')

df_curr_2017_master = pd.concat([df_info, df_2017[f"Cases_20{year_short}_normalized"]], axis=1)
df_curr_2017_master

Unnamed: 0,County_1,FIPS,Population,Cases_2017_normalized
0,Adams,17001,65435,1.513844
1,Alexander,17003,5761,0.000000
2,Bond,17005,16426,0.000000
3,Boone,17007,53544,1.868705
4,Brown,17009,6578,0.000000
...,...,...,...,...
97,Whiteside,17195,55175,0.000000
98,Will,17197,690743,0.144827
99,Williamson,17199,66597,0.000000
100,Winnebago,17201,282572,1.053811


In [183]:
dfs = []
for year in np.arange(2017,2025):
	# use the cols identified earlier
	# so for df_climate, the year is only the last two digits
	year_short = year - 2000

	source_order = df_climate["County_1"].unique()

	df_cases = pd.read_csv(f"../finished_csvs/master_20{year_short}.csv")

	df_cases["County"] = pd.Categorical(df_cases["County"], categories=source_order, ordered=True)
	df_cases = df_cases.sort_values("County").reset_index(drop=True)

	df_master[f"Cases_normalized_20{year_short}"] = df_cases[f"Cases_20{year_short}_normalized"]

	# df for the curr year
	df = df_master.filter(regex=rf'{year_short}')

	df = pd.concat([df_info, df], axis=1)

	dfs.append(df)
	df.to_csv(f"../finished_grnd_trth_csvs/df_{year}.csv")

In [184]:
dfs[5] # for 2022

Unnamed: 0,County_1,FIPS,Population,Hum_22,Pre_22,Tmax_22,Tmin_22,WS_22,LSTNight_2022,LSTDay_2022,Chlorophyll_2022,Ag_MEAN_2022,Fs_MEAN_2022,Rn_MEAN_2022,Cases_normalized_2022
0,Adams,17001,65435,85.812794,3.216718,28.540558,17.386294,3.610066,20.40,27.09,1.648361,0.80,0.81,0.78,0.0
1,Alexander,17003,5761,92.118736,2.064861,30.895596,18.904651,2.979364,20.95,28.71,1.830788,0.76,0.84,0.60,0.0
2,Bond,17005,16426,91.182062,4.352683,28.980863,17.687444,3.610158,21.11,27.47,2.121371,0.84,0.84,0.82,0.0
3,Boone,17007,53544,91.884123,4.545900,26.413354,14.532638,4.166481,17.34,26.63,1.588516,0.82,0.82,0.80,0.0
4,Brown,17009,6578,89.681143,3.064877,28.337429,16.792730,3.706307,20.01,26.64,1.768407,0.82,0.83,0.79,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Whiteside,17195,55175,89.170262,3.935786,27.160878,15.795835,3.601012,18.06,26.92,1.495601,0.83,0.83,0.75,0.0
98,Will,17197,690743,89.648536,2.768558,27.543656,15.634714,3.745801,20.47,29.04,1.218561,0.79,0.75,0.78,0.0
99,Williamson,17199,66597,92.863593,3.322471,30.158086,18.154403,2.804807,22.31,27.82,1.767567,0.78,0.81,0.78,0.0
100,Winnebago,17201,282572,90.665365,4.889522,26.652604,14.937242,4.139305,17.48,27.51,1.419374,0.83,0.81,,0.0


Small edit: instead of individual LSTDay and LSTNight columns, get LST_mean_``year``

Completing some additional quality checks: 
- some cells have value of ``#DIV/0!`` from xlsx
- some cells are NaN due to incomplete data

In [16]:
for year in np.arange(2017, 2025):
  df_curr = pd.read_csv(f"../finished_grnd_trth_csvs/df_{year}.csv").drop(columns=["Unnamed: 0"])
  df_curr[f"LSTMean_{year}"] = ((df_curr[f"LSTNight_{year}"] + df_curr[f"LSTDay_{year}"]) / 2)
  col = df_curr.pop(f"LSTMean_{year}")
  df_curr.insert(8, f"LSTMean_{year}", col)
  df_curr = df_curr.drop(columns=[f"LSTNight_{year}", f"LSTDay_{year}"])
  
	#additional quality checks I found while manually checking csvs: 
  df_curr = df_curr.fillna(0)
  df_curr = df_curr.replace(["#DIV/0!"], 0)
  df_curr.to_csv(f"../finished_grnd_trth_csvs_fix/df_{year}.csv", index=False)
  
df_curr

Unnamed: 0,County_1,FIPS,Population,Hum_24,Pre_24,Tmax_24,Tmin_24,WS_24,LSTMean_2024,Chlorophyll_2024,Ag_MEAN_2024,Fs_MEAN_2024,Rn_MEAN_2024,Cases_normalized_2024
0,Adams,17001,65435,86.957233,2.887003,28.874651,17.100551,3.662825,22.630,1.626381,0.80,0.81,0.7783,0.000000
1,Alexander,17003,5761,94.356359,3.490334,30.109471,18.601800,3.123587,23.345,1.590579,0.76,0.84,0.6248,0.000000
2,Bond,17005,16426,90.279601,3.576298,29.501770,17.693506,3.822610,23.070,1.548130,0.82,0.83,0.8093,0.000000
3,Boone,17007,53544,92.348444,3.880356,27.161379,14.746334,3.891934,20.740,1.502652,0.81,0.80,0.7795,0.000000
4,Brown,17009,6578,89.503626,3.176027,28.790812,16.788935,3.769309,22.375,1.826333,0.81,0.83,0.7871,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Whiteside,17195,55175,89.602076,3.247248,27.904806,15.772279,3.454726,21.440,1.716051,0.81,0.80,0.7413,0.000000
98,Will,17197,690743,90.327146,3.290599,28.393179,15.664390,3.600911,23.335,1.185811,0.78,0.75,0.7786,0.282253
99,Williamson,17199,66597,92.386553,3.691179,30.178877,18.317548,2.992142,23.705,1.384585,0.79,0.81,0.7805,0.000000
100,Winnebago,17201,282572,91.906219,3.870281,27.436860,14.906896,3.896780,21.115,1.356500,0.79,0.79,0,1.409493


#### SHAP TabNet Ranking

In [22]:
#### use finished_grnd_trth_csvs ####