# 202108090846_XGBoostRegressor_tree_sweep
Another sweep -- this time with some different hyperparameters, based on the [first sweep's results](https://wandb.ai/hushifang/202108_Kaggle_tabular_playground/sweeps/4abje3y9?workspace=user-hushifang). This one will be for tree models only (plus DART).

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# general ML tooling
from sklearn.model_selection import train_test_split, StratifiedKFold#, KFold
from sklearn.metrics import mean_squared_error
import wandb
from wandb.xgboost import wandb_callback
# import timm
from pathlib import Path
import os
import math

In [10]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# from torchinfo import summary
# # from fastai.callback.wandb import *
# from torch.cuda.amp import autocast, GradScaler

In [11]:
# from fastai.vision.all import *
# from fastai.tabular.all import *
# from fastai.callback.wandb import WandbCallback

In [12]:
# import xgboost as xgb
from xgboost import XGBRegressor, XGBRFRegressor
# from xgboost import XGBClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.tree import DecisionTreeClassifier

In [13]:
# %config Completer.use_jedi = False

In [14]:
# set_seed(42, reproducible=True)

In [15]:
datapath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/202108_august/')

In [16]:
df = pd.read_csv(datapath/'train.csv', index_col='id', low_memory=False)

In [17]:
# df.isnull().sum().any() # False

In [18]:
# df.info(verbose=True, null_counts=True)

So no NaNs.

In [19]:
# df.describe()

In [20]:
y = df.loss

In [21]:
features = [x for x in df.columns if x != 'loss']

In [22]:
X = df[features]

In [23]:
len(X)

250000

In [24]:
df.head()

Unnamed: 0_level_0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f91,f92,f93,f94,f95,f96,f97,f98,f99,loss
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-0.00235,59,0.766739,-1.35046,42.2727,16.6857,30.3599,1.2673,0.392007,1.09101,...,-42.4399,26.854,1.45751,0.696161,0.941764,1.82847,0.92409,2.29658,10.4898,15
1,0.784462,145,-0.463845,-0.530421,27324.9,3.47545,160.498,0.828007,3.73586,1.28138,...,-184.132,7.90137,1.70644,-0.494699,-2.0583,0.819184,0.439152,2.3647,1.14383,3
2,0.317816,19,-0.432571,-0.382644,1383.26,19.7129,31.1026,-0.515354,34.4308,1.2421,...,7.43721,37.2181,3.25339,0.337934,0.615037,2.21676,0.745268,1.69679,12.3055,6
3,0.210753,17,-0.616454,0.946362,-119.253,4.08235,185.257,1.38331,-47.5214,1.0913,...,9.66778,0.626942,1.49425,0.517513,-10.2221,2.62731,0.61727,1.45645,10.0288,2
4,0.439671,20,0.968126,-0.092546,74.302,12.3065,72.186,-0.233964,24.3991,1.10151,...,290.657,15.6043,1.73557,-0.476668,1.39019,2.19574,0.826987,1.78485,7.07197,1


In [25]:
len(y), len(y.value_counts())

(250000, 43)

So, it seems that we're actually looking at a classification problem here -- not a regression one.

# XGBRegressor

For the Sweep, you have to encapsulate your configs, your `wandb.init`, and everything else into a single function that will be called on each sweep iteration.

In [26]:
sweep_config = {
    "method": "grid", # try grid or random
    "metric": {
      "name": "rmse",
      "goal": "minimize"   
    },
    "parameters": {
        "booster": {
            "values": ["gbtree", "dart"]
        },
        "max_depth": {
            "values": [3, 6, 9, 12]
        },
        "learning_rate": {
            "values": [0.1, 0.2, 0.3]
        },
        "subsample": {
            "values": [1, 0.5]
        },
        "n_estimators": {
            "values": [50, 125, 200]
        }
    }
}

In [27]:
# %env "WANDB_NOTEBOOK_NAME" "202108090846_XGBoostRegressor_tree_sweep"
os.environ['WANDB_NOTEBOOK_NAME'] = '202108090846_XGBoostRegressor_tree_sweep.ipynb'

In [28]:
sweep_id = wandb.sweep(sweep_config, project="202108_Kaggle_tabular_playground")

Create sweep with ID: 81jyype5
Sweep URL: https://wandb.ai/hushifang/202108_Kaggle_tabular_playground/sweeps/81jyype5


In [29]:
def train():
    config_defaults = {
        "library": "xgboost",
        "tree_method": "auto", # set to 'gpu_hist' to try GPU if available
        "booster": 'gbtree',
        "n_estimators": 100,
        "max_depth": 6,
        "learning_rate": 0.1,
        "subsample": 1,
        "seed": 42,
        "test_size": 0.2,
    }

    wandb.init(
#         project="202108_Kaggle_tabular_playground",
        save_code=True,
        tags=['XGBoost'],
        name='202108080949_XGBRegressor_tree_sweep',
        notes='XGBRegressor sweep using tree boosting (gbtree and dart) and the GPU',
        config=config_defaults)
    
    config = wandb.config
        
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=config.test_size, random_state=config.seed)
    
    model = XGBRegressor(
        tree_method=config.tree_method,
        booster=config.booster,
        n_estimators=config.n_estimators, 
        max_depth=config.max_depth,
        learning_rate=config.learning_rate, 
        subsample=config.subsample,
        random_state=config.seed,
        test_size=config.test_size,
        n_jobs=-1, 
        verbosity=1, 
    )
    wandb.log({'params': model.get_params()})
    model.fit(X_train, y_train, callbacks=[wandb.xgboost.wandb_callback()])
    y_preds = model.predict(X_valid)
    mse = mean_squared_error(y_valid, y_preds)
    rmse = math.sqrt(abs(mse))
    wandb.log({'mse':mse, 'rmse':rmse})
    print(f"MSE is {mse}\nRMSE is {rmse}")   
    

In [30]:
?wandb.agent

In [None]:
wandb.agent('mwlm6rsr', function=train)  

[34m[1mwandb[0m: Agent Starting Run: ahcbblr0 with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: Currently logged in as: [33mhushifang[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.221654832952375
RMSE is 7.888070412525003


VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,56.0
_timestamp,1628618243.0
_step,51.0
mse,62.22165
rmse,7.88807


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 3ni3oc1j with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.85211582424859
RMSE is 7.927932632423701


VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,46.0
_timestamp,1628618332.0
_step,51.0
mse,62.85212
rmse,7.92793


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: i9xs7nyr with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	n_estimators: 125
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.55799323216011
RMSE is 7.909361114031911


VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,145.0
_timestamp,1628618496.0
_step,126.0
mse,62.55799
rmse,7.90936


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: q9xq3etx with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	n_estimators: 125
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 63.931348974943134
RMSE is 7.9957081596906185


VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,144.0
_timestamp,1628618652.0
_step,126.0
mse,63.93135
rmse,7.99571


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: h8m94owt with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.93615091318708
RMSE is 7.933230799188126


VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,333.0
_timestamp,1628618995.0
_step,201.0
mse,62.93615
rmse,7.93323


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: rmnpchsj with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 64.74018687153294
RMSE is 8.04612868847702


VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,284.0
_timestamp,1628619293.0
_step,201.0
mse,64.74019
rmse,8.04613


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: lkc78zdy with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.79343372989
RMSE is 7.860879959005225


VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,26.0
_timestamp,1628619330.0
_step,51.0
mse,61.79343
rmse,7.86088


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 0uioaz95 with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.76117830964399
RMSE is 7.858828049375046


VBox(children=(Label(value=' 0.08MB of 0.08MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,20.0
_timestamp,1628619361.0
_step,51.0
mse,61.76118
rmse,7.85883


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: o67igx05 with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 125
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.45890760469879
RMSE is 7.839573177456716


VBox(children=(Label(value=' 0.10MB of 0.10MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,87.0
_timestamp,1628619459.0
_step,126.0
mse,61.45891
rmse,7.83957


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w6ns2d0q with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 125
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.509963874576634
RMSE is 7.842828818390507


VBox(children=(Label(value=' 0.11MB of 0.11MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,94.0
_timestamp,1628619633.0
_step,126.0
mse,61.50996
rmse,7.84283


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 73rozdha with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.37179241880554
RMSE is 7.83401508926333


VBox(children=(Label(value=' 0.12MB of 0.12MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,214.0
_timestamp,1628619857.0
_step,201.0
mse,61.37179
rmse,7.83402


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 0owpiyy3 with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.55942254977895
RMSE is 7.84598129935185


VBox(children=(Label(value=' 0.12MB of 0.12MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,210.0
_timestamp,1628620078.0
_step,201.0
mse,61.55942
rmse,7.84598


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 4652jygd with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.805495852354234
RMSE is 7.861647146263576


VBox(children=(Label(value=' 0.12MB of 0.12MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,32.0
_timestamp,1628620122.0
_step,51.0
mse,61.8055
rmse,7.86165


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: pvwi81py with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.10754814241743
RMSE is 7.880834228837543


VBox(children=(Label(value=' 0.13MB of 0.13MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,31.0
_timestamp,1628620163.0
_step,51.0
mse,62.10755
rmse,7.88083


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: qnwfkgxz with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 125
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 61.86766415364722
RMSE is 7.86560005045052


VBox(children=(Label(value=' 0.14MB of 0.14MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,112.0
_timestamp,1628620285.0
_step,126.0
mse,61.86766
rmse,7.8656


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 44cpjcfy with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 125
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.80520090232392
RMSE is 7.924973243003658


VBox(children=(Label(value=' 0.15MB of 0.15MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,104.0
_timestamp,1628620400.0
_step,126.0
mse,62.8052
rmse,7.92497


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: sz36a6e0 with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.0552558390581
RMSE is 7.877515841879221


VBox(children=(Label(value=' 0.15MB of 0.15MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,209.0
_timestamp,1628620620.0
_step,201.0
mse,62.05526
rmse,7.87752


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: gw88772w with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 63.55100212283263
RMSE is 7.971888240738992


Retry attempt failed:
Traceback (most recent call last):
  File "/home/sf/anaconda3/envs/tabular/lib/python3.8/site-packages/urllib3/connection.py", line 158, in _new_conn
    conn = connection.create_connection(
  File "/home/sf/anaconda3/envs/tabular/lib/python3.8/site-packages/urllib3/util/connection.py", line 57, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/home/sf/anaconda3/envs/tabular/lib/python3.8/socket.py", line 918, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sf/anaconda3/envs/tabular/lib/python3.8/site-packages/urllib3/connectionpool.py", line 597, in urlopen
    httplib_response = self._make_request(conn, method, url,
  File "/home/sf/anaconda3/envs/tabular/lib/python3.8/site-packages/ur

VBox(children=(Label(value=' 0.16MB of 0.16MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,229.0
_timestamp,1628620860.0
_step,201.0
mse,63.551
rmse,7.97189


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: 1w16lna2 with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 1
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




MSE is 62.47507130339245
RMSE is 7.904117363968759


VBox(children=(Label(value=' 0.16MB of 0.16MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,45.0
_timestamp,1628621896.0
_step,51.0
mse,62.47507
rmse,7.90412


0,1
_runtime,▁█
_timestamp,▁█
_step,▁█
mse,▁
rmse,▁


[34m[1mwandb[0m: Agent Starting Run: lia7zo2f with config:
[34m[1mwandb[0m: 	booster: dart
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	n_estimators: 50
[34m[1mwandb[0m: 	subsample: 0.5
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




# Old:

In [31]:
model.fit(X_train, y_train, callbacks=[wandb.xgboost.wandb_callback()])
# model.fit(X, y)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.1, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=200, n_jobs=-1, num_parallel_tree=1, random_state=42,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=1)

In [32]:
# wandb.sklearn.plot_regressor(model, X_train, X_valid, y_train, y_valid,  model_name='RandomForestRegressor', )

In [33]:
# y_pred = model.predict(X_valid)

In [34]:
# from sklearn.model_selection import cross_val_score

In [35]:
# scores = cross_val_score(model, X, y, cv=5, n_jobs=-1)

In [36]:
# model.get_depth()

In [38]:
y_valid_preds = model.predict(X_valid)



In [39]:
mse = mean_squared_error(y_valid, y_valid_preds)

In [40]:
mse

61.500556614829804

In [41]:
rmse = math.sqrt(mse)
rmse

7.842229059064126

In [None]:
wandb.log({'rmse': rmse, 'mse': mse, 'y_valid_preds': y_valid_preds})

In [42]:
# rmses = [math.sqrt(abs(score)) for score in scores]
# rmses

Let's give that a submit...

In [43]:
test_df = pd.read_csv(datapath/'test.csv', index_col='id', low_memory=False)

In [44]:
test_df.head()

Unnamed: 0_level_0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
250000,0.812665,15,-1.23912,-0.893251,295.577,15.8712,23.0436,0.942256,29.898,1.11394,...,0.446389,-422.332,-1.4463,1.69075,1.0593,-3.01057,1.94664,0.52947,1.38695,8.78767
250001,0.190344,131,-0.501361,0.801921,64.8866,3.09703,344.805,0.807194,38.4219,1.09695,...,0.377179,10352.2,21.0627,1.84351,0.251895,4.44057,1.90309,0.248534,0.863881,11.7939
250002,0.919671,19,-0.057382,0.901419,11961.2,16.3965,273.24,-0.0033,37.94,1.15222,...,0.99014,3224.02,-2.25287,1.551,-0.559157,17.8386,1.83385,0.931796,2.33687,9.054
250003,0.860985,19,-0.549509,0.471799,7501.6,2.80698,71.0817,0.792136,0.395235,1.20157,...,1.39688,9689.76,14.7715,1.4139,0.329272,0.802437,2.23251,0.893348,1.35947,4.84833
250004,0.313229,89,0.588509,0.167705,2931.26,4.34986,1.57187,1.1183,7.75463,1.16807,...,0.862502,2693.35,44.1805,1.5802,-0.191021,26.253,2.68238,0.361923,1.5328,3.7066


In [45]:
X_test = test_df[features] # this is just for naming consistency

In [46]:
y_test_preds = model.predict(X_test)



In [47]:
sample_df = pd.read_csv(datapath/'sample_submission.csv')

In [48]:
sample_df.loc[:, 'loss'] = y_test_preds

In [52]:
sample_df.head()

Unnamed: 0,id,loss
0,250000,7.32852
1,250001,4.610067
2,250002,7.277142
3,250003,7.030011
4,250004,7.187167


In [50]:
sample_df.to_csv('202108062038_XGBoost.csv', index=False)

In [51]:
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…