# Tree-based Models, with Clusters

## Library Imports

In [1]:
# Necessary code to import our helper functions
import sys
sys.path.append("../..")

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Library imports
import numpy as np
import pandas as pd
from category_encoders import TargetEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import KNNImputer
from matplotlib import pyplot as plt
import xgboost as xgb
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from Common_Functions import data_split, add_unique_identifier, data_cleaning, hospital_data_agg, optimal_k

  from pandas import MultiIndex, Int64Index


In [3]:
# Method from Shruti's code
def standardize_data(train_data, val_data):
    train_temp = train_data.drop(columns = ['site','cluster','lat','lon'])
    val_temp = val_data.drop(columns = ['site','cluster','lat','lon'])
    
    scaler = MinMaxScaler()
    
    train_data_scaled = scaler.fit_transform(train_temp)
    train_data_scaled = pd.DataFrame(train_data_scaled, columns = train_temp.columns)
    train_data_scaled['cluster'] = train_data['cluster'].to_list()
    train_data_scaled['site'] = train_data['site'].to_list()
    train_data_scaled['lat'] = train_data['lat'].to_list()
    train_data_scaled['lon'] = train_data['lon'].to_list()
    
    val_data_scaled = scaler.transform(val_temp)
    val_data_scaled = pd.DataFrame(val_data_scaled, columns = val_temp.columns)
    val_data_scaled['cluster'] = val_data['cluster'].to_list()
    val_data_scaled['site'] = val_data['site'].to_list()
    val_data_scaled['lat'] = val_data['lat'].to_list()
    val_data_scaled['lon'] = val_data['lon'].to_list()
    
    return train_data_scaled, val_data_scaled

In [4]:
# Method slightly modified from Shruti's code
def impute_knn(train_data, val_data, optimal_k):
    train_data_scaled, val_data_scaled = standardize_data(train_data, val_data)

    knn = KNNImputer(n_neighbors = optimal_k)

    # imputing values
    train_data_scaled[list(train_data_scaled.columns)] = knn.fit_transform(train_data_scaled)
    val_data_scaled[list(val_data_scaled.columns)] = knn.transform(val_data_scaled)
    
    return train_data_scaled, val_data_scaled

## Data Import

In [5]:
data = pd.read_csv("../../Feature Matrix/processed_data.csv")
data.dropna(subset = ['mcare_count'], inplace = True)
data.drop(columns = ['year'], inplace = True)
data.replace([np.inf, -np.inf], 0, inplace=True)

## Model Parameters

In [6]:
COUNT_THRESH = 34
RDM_SEED = 123
TRAIN_TEST_PROPORTION = 0.8
MONOTONE_MODEL = True

## Data Transformation

### One-Hot Categorical Encoding and Dropping NAs

In [7]:
data = data_cleaning(data, dropna = False, one_hot = False)

### Data Split

In [8]:
working_set, predict_set = data_split(data, count_thresh = COUNT_THRESH)

In [9]:
model_data = working_set

In [10]:
display(model_data)

Unnamed: 0,site,group,priv_count,priv_pay_median,mcare_los,mcare_pay_median,CBSA_NAME,lon,lat,Hospitals,...,annual_births,frac_veteran,frac_disability,non_citizen,employment_rate,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,cluster,mcare_count
40,1,breast reconstruction,63,24289.900,2.549296,8794.190,"Dallas-Fort Worth-Arlington, TX",-96.920913,32.707875,114.0,...,1974825.0,0.06,0.10,0.59,0.69,0.66,0.25,0.17,0,71.0
70,1,breast reconstruction,51,21408.000,3.543210,10395.160,"Houston-The Woodlands-Sugar Land, TX",-95.622552,29.598443,181.0,...,1808878.0,0.05,0.10,0.57,0.66,0.60,0.28,0.19,0,81.0
112,1,breast reconstruction,64,29757.100,3.918699,14174.100,"New York-Newark-Jersey City, NY-NJ-PA",-74.005954,40.712776,143.0,...,4668590.0,0.03,0.11,0.41,0.65,0.66,0.39,0.07,0,123.0
219,1,breast reconstruction,66,25240.905,3.241935,10144.445,"Dallas-Fort Worth-Arlington, TX",-96.920913,32.707875,114.0,...,1974825.0,0.06,0.10,0.59,0.69,0.66,0.25,0.17,0,62.0
486,0,breast reconstruction,60,6998.350,0.000000,3487.540,"Akron, OH",-81.519005,41.081445,11.0,...,160665.0,0.06,0.14,0.40,0.63,0.68,0.40,0.06,0,39.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44337,0,tka,74,18179.090,,8037.267,"Memphis, TN-MS-AR",-89.850500,35.038720,28.0,...,327202.0,0.07,0.13,0.64,0.64,0.64,0.37,0.11,2,0.0
44419,0,tka,108,17582.420,,8491.004,"Detroit-Warren-Dearborn, MI",-83.079090,42.810540,30.0,...,992227.0,0.05,0.13,0.38,0.62,0.70,0.41,0.05,2,0.0
44537,0,pka,50,15788.130,,7272.170,"Detroit-Warren-Dearborn, MI",-83.079090,42.810540,30.0,...,992227.0,0.05,0.13,0.38,0.62,0.70,0.41,0.05,0,0.0
44974,0,ant_cerv_fusion,54,17137.490,,6934.483,"Atlanta-Sandy Springs-Alpharetta, GA",-84.294090,34.075380,80.0,...,1573561.0,0.07,0.11,0.48,0.67,0.70,0.28,0.12,2,0.0


## Split Model Data by Cluster

In [11]:
X_input = model_data.drop(columns=["priv_pay_median"])
y_input = model_data["priv_pay_median"]

X_list = []
y_list = []

X_dev_list = []
y_dev_list = []
X_test_list = []
y_test_list = []
optimal_k_list = []

for cluster_label in model_data["cluster"].unique():
    
    X_clu = X_input[X_input["cluster"] == cluster_label]
    y_clu = y_input[X_input["cluster"] == cluster_label]
    
    X_dev_clu, X_test_clu, y_dev_clu, y_test_clu = train_test_split(X_clu,
                                                                    y_clu,
                                                                    train_size = TRAIN_TEST_PROPORTION,
                                                                    random_state = RDM_SEED)
    
    # Target encoding group variable
    te_group = TargetEncoder(min_samples_leaf=1)
    X_dev_clu['group_encoded'] = te_group.fit_transform(X_dev_clu['group'],y_dev_clu)
    X_dev_clu.drop(columns = 'group', inplace = True)
    X_test_clu['group_encoded'] = te_group.transform(X_test_clu['group'])
    X_test_clu.drop(columns = 'group', inplace = True)

    # Target encoding CBSA Name
    te_CBSA_NAME = TargetEncoder(min_samples_leaf=1)
    X_dev_clu['CBSA_NAME_encoded'] = te_CBSA_NAME.fit_transform(X_dev_clu['CBSA_NAME'],y_dev_clu)
    X_dev_clu.drop(columns = 'CBSA_NAME', inplace = True)
    X_test_clu['CBSA_NAME_encoded'] = te_CBSA_NAME.transform(X_test_clu['CBSA_NAME'])
    X_test_clu.drop(columns = 'CBSA_NAME', inplace = True)
    
    # KNN Imputation
    knn_data = X_dev_clu.copy()
    knn_data['priv_pay_median'] = y_dev_clu

    optimal_k_list.append(optimal_k(knn_data))
    print(optimal_k_list)
    
    display(X_dev_clu)
    display(y_dev_clu)
    
    X_dev_clu, X_test_clu = impute_knn(X_dev_clu, X_test_clu, optimal_k_list[-1])
    display(X_dev_clu)
    display(y_dev_clu)
    
    X_dev_list.append(X_dev_clu)
    y_dev_list.append(y_dev_clu)
    X_test_list.append(X_test_clu)
    y_test_list.append(y_test_clu)
    
    print(X_dev_clu.shape[0] / X_test_clu.shape[0])



[12]


Unnamed: 0,site,priv_count,mcare_los,mcare_pay_median,lon,lat,Hospitals,PctTeaching,PctLargeHospital,PctPrivate,...,frac_disability,non_citizen,employment_rate,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,cluster,mcare_count,group_encoded,CBSA_NAME_encoded
8040,0,74,0.000000,4022.34,-78.928824,33.919657,7.0,0.142857,0.000000,0.571429,...,0.18,0.44,0.52,0.60,0.48,0.12,0,324.0,11169.997680,18923.578603
33415,0,83,0.000000,6250.08,-96.731260,43.546020,7.0,0.571429,0.285714,0.714286,...,,,,,,,0,203.0,13389.274637,10902.294958
7887,0,74,0.000000,4291.79,-81.872308,26.640628,6.0,0.000000,0.166667,0.500000,...,0.14,0.39,0.53,0.61,0.45,0.11,0,537.0,11169.997680,10522.583663
32982,0,748,0.000000,5838.67,-80.133610,25.806050,39.0,0.410256,0.153846,0.871795,...,,,,,,,0,143.0,13389.274637,16117.525385
7678,0,445,0.000000,5128.06,-118.255362,34.140662,204.0,0.166667,0.058824,0.750000,...,,,,,,,0,1441.0,11169.997680,11536.585000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28573,0,128,0.000000,4543.35,-118.255400,34.140660,204.0,0.166667,0.058824,0.750000,...,,,,,,,0,137.0,9116.400036,11536.585000
22698,0,56,0.000000,3986.80,-81.732857,28.022243,7.0,0.000000,0.285714,0.714286,...,0.14,0.54,0.56,0.58,0.40,0.14,0,56.0,9563.861695,17273.776259
32865,0,1163,0.000000,6021.34,-96.920910,32.707880,114.0,0.105263,0.052632,0.807018,...,0.10,0.59,0.69,0.66,0.25,0.17,0,420.0,13389.274637,13962.877823
22577,0,52,0.000000,3966.32,-80.413939,37.229573,4.0,0.250000,0.000000,0.750000,...,,,,,,,0,54.0,9563.861695,13875.817651


8040     26758.965
33415    10391.310
7887     13188.780
32982    18330.000
7678     10871.280
           ...    
28573     7972.500
22698    18046.000
32865    10714.470
22577    13181.730
25194    22332.970
Name: priv_pay_median, Length: 1722, dtype: float64

Unnamed: 0,priv_count,mcare_los,mcare_pay_median,Hospitals,PctTeaching,PctLargeHospital,PctPrivate,total_population,median_age,sex_ratio,...,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,mcare_count,group_encoded,CBSA_NAME_encoded,cluster,site,lat,lon
0,0.012841,0.000000,0.263026,0.029557,0.142857,0.000000,0.571429,0.020752,1.000000,0.16875,...,0.266667,0.939394,0.473684,0.075157,0.315835,0.808371,0.0,0.0,33.919657,-78.928824
1,0.017657,0.000000,0.408701,0.029557,0.571429,0.857143,0.714286,0.009198,0.407258,0.65000,...,0.733333,0.393939,0.210526,0.047089,0.404049,0.341756,0.0,0.0,43.546020,-96.731260
2,0.012841,0.000000,0.280646,0.024631,0.000000,0.500000,0.500000,0.034897,0.951613,0.42500,...,0.300000,0.848485,0.421053,0.124565,0.315835,0.319667,0.0,0.0,26.640628,-81.872308
3,0.373462,0.000000,0.381798,0.187192,0.410256,0.461539,0.871795,0.034897,0.951613,0.42500,...,0.300000,0.848485,0.421053,0.033171,0.404049,0.645137,0.0,0.0,25.806050,-80.133610
4,0.211343,0.000000,0.335330,1.000000,0.166667,0.176471,0.750000,0.182953,0.419355,0.61250,...,0.266667,0.719697,0.315789,0.334261,0.315835,0.378654,0.0,0.0,34.140662,-118.255362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1717,0.041734,0.000000,0.297095,1.000000,0.166667,0.176471,0.750000,0.182953,0.419355,0.61250,...,0.266667,0.719697,0.315789,0.031779,0.234207,0.378654,0.0,0.0,34.140660,-118.255400
1718,0.003210,0.000000,0.260702,0.029557,0.000000,0.857143,0.714286,0.033145,0.564516,0.42500,...,0.200000,0.696970,0.578947,0.012990,0.251993,0.712399,0.0,0.0,28.022243,-81.732857
1719,0.595506,0.000000,0.393743,0.556650,0.105263,0.157895,0.807018,0.389385,0.391129,0.46875,...,0.466667,0.242424,0.736842,0.097425,0.404049,0.519797,0.0,0.0,32.707880,-96.920910
1720,0.001070,0.000000,0.259363,0.014778,0.250000,0.000000,0.750000,0.003393,0.322581,0.63125,...,0.508333,0.550505,0.368421,0.012526,0.251993,0.514732,0.0,0.0,37.229573,-80.413939


8040     26758.965
33415    10391.310
7887     13188.780
32982    18330.000
7678     10871.280
           ...    
28573     7972.500
22698    18046.000
32865    10714.470
22577    13181.730
25194    22332.970
Name: priv_pay_median, Length: 1722, dtype: float64

3.9953596287703017




[12, 7]


Unnamed: 0,site,priv_count,mcare_los,mcare_pay_median,lon,lat,Hospitals,PctTeaching,PctLargeHospital,PctPrivate,...,frac_disability,non_citizen,employment_rate,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,cluster,mcare_count,group_encoded,CBSA_NAME_encoded
26790,1,76,6.738390,12988.310,-86.580447,36.214401,38.0,0.210526,0.105263,0.710526,...,0.11,0.58,0.68,0.74,0.29,0.09,2,646.0,28481.206648,26853.873683
31104,0,537,0.000000,2743.560,-74.005950,40.712780,143.0,0.552448,0.230769,0.643357,...,0.11,0.41,0.65,0.66,0.39,0.07,2,1164.0,8606.777092,34961.451645
16047,1,309,1.623141,11053.620,-80.721442,35.122317,26.0,0.115385,0.076923,0.576923,...,0.11,0.57,0.67,0.69,0.31,0.10,2,2152.0,26854.880413,31192.516874
3997,0,111,0.000000,8138.820,-80.721442,35.122317,26.0,0.115385,0.076923,0.576923,...,0.11,0.57,0.67,0.69,0.31,0.10,2,250.0,26155.095986,31192.516874
34939,0,64,0.000000,15273.880,-86.811380,33.405390,24.0,0.250000,0.166667,0.583333,...,0.15,0.58,0.61,0.69,0.35,0.08,2,896.0,33668.127594,14402.002957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13102,1,78,2.918110,11674.990,-88.011847,41.743507,12.0,0.083333,0.000000,0.416667,...,0.11,0.46,0.66,0.70,0.33,0.08,2,635.0,29030.888931,26294.258057
12988,1,114,3.133022,12536.600,-75.165242,39.952632,11.0,0.636364,0.272727,0.909091,...,0.13,0.44,0.65,0.73,0.36,0.05,2,857.0,29030.888931,21725.518810
13049,1,135,2.934839,12109.270,-80.128575,26.307280,25.0,0.280000,0.160000,0.640000,...,,,,,,,2,1151.0,29030.888931,19252.156885
37126,0,58,0.000000,16834.090,-97.678900,30.508260,50.0,0.180000,0.040000,0.880000,...,0.10,0.58,0.71,0.74,0.23,0.13,2,990.0,36104.375246,28905.094486


26790    31065.590
31104     7346.390
16047    32035.780
3997     25894.820
34939    15958.690
           ...    
13102    26352.685
12988    27081.200
13049    22757.530
37126    28180.190
44974    17137.490
Name: priv_pay_median, Length: 899, dtype: float64

Unnamed: 0,priv_count,mcare_los,mcare_pay_median,Hospitals,PctTeaching,PctLargeHospital,PctPrivate,total_population,median_age,sex_ratio,...,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,mcare_count,group_encoded,CBSA_NAME_encoded,cluster,site,lat,lon
0,0.038348,0.690017,0.397455,0.161616,0.315789,0.315789,0.629274,0.084752,0.210526,0.431193,...,0.727273,0.285714,0.375000,0.072373,0.589353,0.355552,2.0,1.0,36.214401,-86.580447
1,0.718289,0.000000,0.083956,0.691919,0.828671,0.692308,0.526313,1.000000,0.345029,0.302752,...,0.363636,0.642857,0.250000,0.130406,0.000000,0.578647,2.0,0.0,40.712780,-74.005950
2,0.382006,0.166211,0.338252,0.101010,0.173077,0.230769,0.424481,0.120194,0.263158,0.275229,...,0.500000,0.357143,0.437500,0.241093,0.541126,0.474937,2.0,1.0,35.122317,-80.721442
3,0.089971,0.000000,0.249056,0.101010,0.173077,0.230769,0.424481,0.120194,0.263158,0.275229,...,0.500000,0.357143,0.437500,0.028008,0.520375,0.474937,2.0,0.0,35.122317,-80.721442
4,0.020649,0.000000,0.467396,0.090909,0.375000,0.500000,0.434306,0.038397,0.339181,0.165138,...,0.500000,0.500000,0.312500,0.100381,0.743165,0.012916,2.0,0.0,33.405390,-86.811380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894,0.041298,0.298817,0.357266,0.030303,0.125000,0.000000,0.178832,0.471208,0.286550,0.495413,...,0.545455,0.428571,0.312500,0.071140,0.605653,0.340153,2.0,1.0,41.743507,-88.011847
895,0.094395,0.320824,0.383632,0.025253,0.954545,0.818182,0.933643,0.302036,0.333333,0.275229,...,0.681818,0.535714,0.125000,0.096012,0.605653,0.214436,2.0,1.0,39.952632,-75.165242
896,0.125369,0.300530,0.370555,0.095960,0.420000,0.480000,0.521168,0.091428,0.390142,0.482307,...,0.279221,0.500000,0.571429,0.128949,0.605653,0.146377,2.0,1.0,26.307280,-80.128575
897,0.011799,0.000000,0.515140,0.222222,0.270000,0.120000,0.889051,0.102223,0.134503,0.908257,...,0.727273,0.071429,0.625000,0.110912,0.815409,0.411995,2.0,0.0,30.508260,-97.678900


26790    31065.590
31104     7346.390
16047    32035.780
3997     25894.820
34939    15958.690
           ...    
13102    26352.685
12988    27081.200
13049    22757.530
37126    28180.190
44974    17137.490
Name: priv_pay_median, Length: 899, dtype: float64

3.9955555555555557




[12, 7, 5]


Unnamed: 0,site,priv_count,mcare_los,mcare_pay_median,lon,lat,Hospitals,PctTeaching,PctLargeHospital,PctPrivate,...,frac_disability,non_citizen,employment_rate,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,cluster,mcare_count,group_encoded,CBSA_NAME_encoded
7113,1,68,4.232210,28678.36,-82.388578,28.274347,49.0,0.285714,0.081633,0.836735,...,0.14,0.40,0.61,0.64,0.37,0.11,1,534.0,71979.981897,84241.530937
6389,1,108,3.581216,26354.81,-86.580447,36.214401,38.0,0.210526,0.105263,0.710526,...,0.11,0.58,0.68,0.74,0.29,0.09,1,1299.0,71979.981897,65580.383448
6458,1,107,4.670782,36389.98,-122.195948,47.607680,38.0,0.368421,0.026316,0.421053,...,,,,,,,1,729.0,71979.981897,68499.161524
6273,1,126,4.506932,31686.59,-104.845462,39.514285,37.0,0.405405,0.135135,0.756757,...,0.10,0.52,0.72,0.71,0.30,0.08,1,1154.0,71979.981897,84825.981201
6401,1,66,3.200772,26767.84,-97.516428,35.467560,45.0,0.244444,0.088889,0.466667,...,0.15,0.61,0.65,0.67,0.35,0.12,1,777.0,71979.981897,56666.853035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7019,1,70,3.355685,27582.49,-75.165242,39.952632,31.0,0.290323,0.032258,0.774194,...,0.13,0.44,0.65,0.73,0.36,0.05,1,343.0,71979.981897,68246.864961
6969,1,50,3.878846,27615.93,-81.655651,30.332184,22.0,0.227273,0.136364,0.909091,...,0.13,0.39,0.63,0.70,0.34,0.10,1,520.0,71979.981897,68499.161524
6678,1,81,4.435897,36471.80,-118.255362,34.140662,204.0,0.166667,0.058824,0.750000,...,,,,,,,1,1326.0,71979.981897,66170.896418
29469,1,105,9.177408,20369.52,-74.005950,40.712780,143.0,0.552448,0.230769,0.643357,...,0.11,0.41,0.65,0.66,0.39,0.07,1,1381.0,35227.033662,92398.468591


7113     97049.600
6389     58350.020
6458     74509.090
6273     90158.015
6401     50237.980
           ...    
7019     69300.165
6969     70849.085
6678     71157.810
29469    41394.000
7077     47677.000
Name: priv_pay_median, Length: 105, dtype: float64

Unnamed: 0,priv_count,mcare_los,mcare_pay_median,Hospitals,PctTeaching,PctLargeHospital,PctPrivate,total_population,median_age,sex_ratio,...,frac_priv_insurance,frac_mcare_insurance,frac_no_insurance,mcare_count,group_encoded,CBSA_NAME_encoded,cluster,site,lat,lon
0,0.062069,0.222213,0.436972,0.209184,0.341219,0.270560,0.845714,0.135918,1.000000,0.265823,...,0.235294,0.777778,0.428571,0.194624,0.625611,0.824493,1.0,1.0,28.274347,-82.388578
1,0.200000,0.135375,0.373622,0.153061,0.211197,0.386139,0.633684,0.072948,0.271429,0.341772,...,0.823529,0.333333,0.285714,0.575411,0.625611,0.422975,1.0,1.0,36.214401,-86.580447
2,0.196552,0.280716,0.647223,0.153061,0.484244,0.000000,0.147368,0.120545,0.282857,0.784810,...,0.423529,0.577778,0.385714,0.291687,0.625611,0.485776,1.0,1.0,47.607680,-122.195948
3,0.262069,0.258860,0.518989,0.147959,0.548201,0.532245,0.711351,0.123024,0.285714,0.962025,...,0.647059,0.388889,0.214286,0.503235,0.625611,0.837068,1.0,1.0,39.514285,-104.845462
4,0.055172,0.084625,0.384883,0.188776,0.269851,0.306051,0.224000,0.043089,0.114286,0.455696,...,0.411765,0.666667,0.500000,0.315580,0.625611,0.231189,1.0,1.0,35.467560,-97.516428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,0.068966,0.105290,0.407094,0.117347,0.349188,0.029064,0.740645,0.293034,0.571429,0.126582,...,0.764706,0.722222,0.000000,0.099552,0.625611,0.480348,1.0,1.0,39.952632,-75.165242
101,0.000000,0.175077,0.408005,0.071429,0.240156,0.538254,0.967273,0.053324,0.600000,0.227848,...,0.588235,0.611111,0.357143,0.187656,0.625611,0.485776,1.0,1.0,30.332184,-81.655651
102,0.106897,0.249384,0.649454,1.000000,0.135350,0.158998,0.700000,0.132660,0.271429,0.822785,...,0.376471,0.555556,0.442857,0.588850,0.625611,0.435681,1.0,1.0,34.140662,-118.255362
103,0.189655,0.881875,0.210438,0.688776,0.802481,1.000000,0.520839,1.000000,0.600000,0.164557,...,0.352941,0.888889,0.142857,0.616227,0.000000,1.000000,1.0,1.0,40.712780,-74.005950


7113     97049.600
6389     58350.020
6458     74509.090
6273     90158.015
6401     50237.980
           ...    
7019     69300.165
6969     70849.085
6678     71157.810
29469    41394.000
7077     47677.000
Name: priv_pay_median, Length: 105, dtype: float64

3.888888888888889


## Run XGBoost model

In [12]:
train_mapes = []
train_sizes = []
test_mapes = []
test_sizes = []

# Train test split
for idx in range(0,len(X_dev_list)):
    print(f"Index is: {idx}")
    
    # Parameterization
    mono = {'site': 1}

    param_grid = {
        'booster':['gbtree','dart'],
        'colsample_bylevel':[1],
        'colsample_bytree':[1],
        'enable_categorical':[False],
        'gamma':[0],
        'gpu_id':[-1],
        'interaction_constraints':[''],
        'max_delta_step':[0],
        'min_child_weight':[1],
        'missing':[np.nan],
#         'n_estimators':[175,200,225,250],
        'n_estimators':[175,225,250],
        'n_jobs':[8],
        'predictor':['auto'],
        'reg_alpha':[0],
        'scale_pos_weight':[1],
        'tree_method':['exact'],
        'validate_parameters':[1],
        'learning_rate':[1],
#         'max_depth':[10,15,20,25],
        'max_depth':[10,20,25],
        'num_parallel_tree':[250],
        'objective':['reg:squarederror'],
        'subsample':[0.8],
        'random_state':[RDM_SEED],
#         'reg_lambda':[0,0.1,0.15,0.2,0.25,0.3]
        'reg_lambda':[0,0.1,0.2],
        'tree_method':['approx','hist']

    }
    if MONOTONE_MODEL:
        param_grid['monotone_constraints'] = [mono]

    # Create, run, and tune (if applicable) model
    xgb_param_tuning_model = xgb.XGBRFRegressor(
#                                                 n_estimators = 250,
#                                                 max_depth=25,
#                                                 reg_lambda=0,
#                                                 booster = 'gbtree'
                                               )
    
    xgb_mono_model = GridSearchCV(xgb_param_tuning_model, param_grid, scoring='neg_mean_absolute_percentage_error')
    xgb_mono_model.fit(X_dev_list[idx], y_dev_list[idx])
    
    # Output optimal params (if applicable)
    print(f"Best parameters (if grid search was applied): {xgb_mono_model.best_params_}")
    
    # Predict on train and test data
    y_train_pred_xgb = xgb_mono_model.predict(X_dev_list[idx])
    y_test_pred_xgb = xgb_mono_model.predict(X_test_list[idx])

    # Store results
    train_sizes.append(len(X_dev_list[idx]))
    test_sizes.append(len(X_test_list[idx]))
    train_mapes.append(mean_absolute_percentage_error(y_true=y_dev_list[idx], y_pred=y_train_pred_xgb))
    test_mapes.append(mean_absolute_percentage_error(y_true=y_test_list[idx], y_pred=y_test_pred_xgb))
    

train_mapes = np.array(train_mapes)
train_sizes = np.array(train_sizes)
test_mapes = np.array(test_mapes)
test_sizes = np.array(test_sizes)

# Output results?
print(f"Random Forest with Threshold >{COUNT_THRESH} claims for training set:")
print(f"Train MAPEs: {train_mapes}")
print(f"Train sizes: {train_sizes}")
print(f"Test MAPEs: {test_mapes}")
print(f"Test sizes: {test_sizes}")
print(f"Total train MAPE: {((train_mapes * train_sizes) / (train_sizes.sum())).sum()}")
print(f"Total test MAPE: {((test_mapes * test_sizes) / (test_sizes.sum())).sum()}")

Index is: 0


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


Best parameters (if grid search was applied): {'booster': 'dart', 'colsample_bylevel': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0, 'gpu_id': -1, 'interaction_constraints': '', 'learning_rate': 1, 'max_delta_step': 0, 'max_depth': 20, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': {'site': 1}, 'n_estimators': 250, 'n_jobs': 8, 'num_parallel_tree': 250, 'objective': 'reg:squarederror', 'predictor': 'auto', 'random_state': 123, 'reg_alpha': 0, 'reg_lambda': 0.1, 'scale_pos_weight': 1, 'subsample': 0.8, 'tree_method': 'hist', 'validate_parameters': 1}
Index is: 1


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.co

KeyboardInterrupt: 