In [1]:
from autogluon.tabular import TabularDataset, TabularPredictor

import autosklearn.regression

from flaml import AutoML

import pandas as pd
import numpy as np
import os
import torch
import seaborn as sns
import matplotlib.pyplot as plt
import argparse
import logging
import pickle

from sklearn.metrics import accuracy_score, r2_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle


from IPython.display import Image, display

from datetime import datetime, timedelta

from scipy.stats import entropy

from __future__ import annotations

from pathlib import Path

2024-07-15 13:02:34,788	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.8.0 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-07-15 13:02:35,131	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.8.0 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
random_seed = 42

In [3]:
## Function for loading one of the 10 folds of the property dataset and concatinating the X and y values for train and test respectively.

base_path = '../../data/361092' # Use this when running locally

def load_fold(fold_number, random_seed=random_seed, auto_gluon=False):
    df_X_train = pd.read_parquet(f'{base_path}/{fold_number}/X_train.parquet')
    df_y_train = pd.read_parquet(f'{base_path}/{fold_number}/y_train.parquet')
    df_X_test = pd.read_parquet(f'{base_path}/{fold_number}/X_test.parquet')
    df_y_test = pd.read_parquet(f'{base_path}/{fold_number}/y_test.parquet')

    # concatinating the X and y values for train (and test), but keeping for test
    df_train = pd.concat([df_X_train, df_y_train], axis=1)


    # Convert to AutoGluon's TabularDataset
    if not auto_gluon:
        return df_X_train, df_y_train, df_X_test, df_y_test
    
    else:
        df_test = pd.concat([df_X_test, df_y_test], axis=1)
        train_dataset = TabularDataset(df_train)
        test_dataset = TabularDataset(df_test)

        return train_dataset, test_dataset

# Also instantiate the target column
label = 'oz252'

# First we begin with AutoGluon run

In [4]:
# Make set out of half the folds

full_train = None
full_test = None

for fold_number in range(1, 11):
    train_dataset, test_dataset = load_fold(fold_number, random_seed=random_seed, auto_gluon=True)
    if full_train is None:
        full_train = train_dataset
        full_test = test_dataset
    else:
        # Use pd.concat to combine TabularDatasets
        full_train = pd.concat([full_train, train_dataset])
        full_test = pd.concat([full_test, test_dataset])


In [5]:
## Function to fit the model using AutoGluon

def fit_gluon(train_dataset, problem_type='regression', hyperparameters=None, eval_metric='r2', presets='medium_quality', time_limit=100, fit_weighted_ensemble=None, num_cpus = None, num_gpus=None, auto_stack=None, num_bag_folds=None, num_bag_sets=None, num_stack_levels=None, num_trials=None, verbosity=None, ag_args_fit=None, feature_prune=None, excluded_model_types=None, keep_only_best=None):
    predictor = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric)

    fit_args = {
        'train_data': train_dataset,
        'presets': presets,
        'time_limit': time_limit,
    }

    if hyperparameters is not None:
        fit_args['hyperparameters'] = hyperparameters
    if auto_stack is not None:
        fit_args['auto_stack'] = auto_stack
    if num_bag_folds is not None:
        fit_args['num_bag_folds'] = num_bag_folds
    if num_bag_sets is not None:
        fit_args['num_bag_sets'] = num_bag_sets
    if num_stack_levels is not None:
        fit_args['num_stack_levels'] = num_stack_levels
    if num_trials is not None:
        fit_args['num_trials'] = num_trials
    if verbosity is not None:
        fit_args['verbosity'] = verbosity
    if ag_args_fit is not None:
        fit_args['ag_args_fit'] = ag_args_fit
    if feature_prune is not None:
        fit_args['feature_prune'] = feature_prune
    if excluded_model_types is not None:
        fit_args['excluded_model_types'] = excluded_model_types
    if fit_weighted_ensemble is not None:
        fit_args['fit_weighted_ensemble'] = fit_weighted_ensemble
    if num_cpus is not None:
        fit_args['num_cpus'] = num_cpus
    if num_gpus is not None:
        fit_args['num_gpus'] = num_gpus
    if keep_only_best is not None:
        fit_args['keep_only_best'] = keep_only_best

    predictor.fit(**fit_args)
    return predictor


In [6]:
autogluon_predictions = fit_gluon(full_train, time_limit=300)

No path specified. Models will be saved in: "AutogluonModels/ag-20240714_091252"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.9.19
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Tue Jun 18 14:00:06 UTC 2024
CPU Count:          4
Memory Avail:       13.63 GB / 15.43 GB (88.3%)
Disk Space Avail:   6.66 GB / 24.99 GB (26.6%)
	We recommend a minimum available disk space of 10 GB, and large datasets may require more.
Presets specified: ['medium_quality']
Beginning AutoGluon training ... Time limit = 300s
AutoGluon will save models to "AutogluonModels/ag-20240714_091252"
Train Data Rows:    79965
Train Data Columns: 62
Label Column:       oz252
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    13960.56 MB
	Train Data (Original)  Memory Usage: 27.15 MB (0.2% of available memory)
	Inferring da

[1000]	valid_set's l2: 0.000349576	valid_set's r2: 0.560283
[2000]	valid_set's l2: 0.000223508	valid_set's r2: 0.718859
[3000]	valid_set's l2: 0.00014857	valid_set's r2: 0.81312
[4000]	valid_set's l2: 0.000104798	valid_set's r2: 0.868179
[5000]	valid_set's l2: 7.54596e-05	valid_set's r2: 0.905082
[6000]	valid_set's l2: 5.56457e-05	valid_set's r2: 0.930006
[7000]	valid_set's l2: 4.17541e-05	valid_set's r2: 0.947479
[8000]	valid_set's l2: 3.21521e-05	valid_set's r2: 0.959557
[9000]	valid_set's l2: 2.51354e-05	valid_set's r2: 0.968383
[10000]	valid_set's l2: 2.01413e-05	valid_set's r2: 0.974665


	0.9747	 = Validation score   (r2)
	76.89s	 = Training   runtime
	1.21s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 206.58s of the 206.57s of remaining time.


[1000]	valid_set's l2: 0.000148557	valid_set's r2: 0.813136
[2000]	valid_set's l2: 4.78074e-05	valid_set's r2: 0.939865
[3000]	valid_set's l2: 1.8812e-05	valid_set's r2: 0.976337
[4000]	valid_set's l2: 9.20317e-06	valid_set's r2: 0.988424
[5000]	valid_set's l2: 6.23347e-06	valid_set's r2: 0.992159
[6000]	valid_set's l2: 5.20264e-06	valid_set's r2: 0.993456
[7000]	valid_set's l2: 4.8503e-06	valid_set's r2: 0.993899
[8000]	valid_set's l2: 4.71645e-06	valid_set's r2: 0.994067
[9000]	valid_set's l2: 4.67143e-06	valid_set's r2: 0.994124
[10000]	valid_set's l2: 4.65759e-06	valid_set's r2: 0.994141


	0.9941	 = Validation score   (r2)
	70.93s	 = Training   runtime
	0.81s	 = Validation runtime
Fitting model: RandomForestMSE ... Training model for up to 134.01s of the 134.01s of remaining time.
		'squared_error'
Detailed Traceback:
joblib.externals.loky.process_executor._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/_utils.py", line 72, in __call__
    return self.func(**kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/parallel.py", line 598, in __call__
    return [func(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/parallel.py", line 598, in <listcomp>
    return [func(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 222, in __call__
    return self.function(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs

In [7]:
pred_autogluon = autogluon_predictions.predict(full_test.drop(columns=[label_property]))
eval = autogluon_predictions.evaluate(full_test)
print(eval)

{'r2': 0.9915471075229365, 'root_mean_squared_error': -0.002735305970449601, 'mean_squared_error': -7.481898751977232e-06, 'mean_absolute_error': -0.00033590307432918693, 'pearsonr': 0.9957650864257349, 'median_absolute_error': -3.381663131718504e-05}


# Then we do the AutoSklearn run

In [8]:
# Make set out of all the folds

full_train_X = None
full_train_y = None
full_test_X = None
full_test_y = None

for fold_number in range(1, 11):
    train_dataset_X, train_dataset_y, test_dataset_X, test_dataset_y = load_fold(fold_number, random_seed=random_seed, auto_gluon=False, )
    if full_train_X is None:
        full_train_X = train_dataset_X
        full_train_y = train_dataset_y
        full_test_X = test_dataset_X
        full_test_y = test_dataset_y
    else:
        # Use pd.concat to combine TabularDatasets
        full_train_X = pd.concat([full_train_X, train_dataset_X])
        full_train_y = pd.concat([full_train_y, train_dataset_y])
        full_test_X = pd.concat([full_test_X, test_dataset_X])
        full_test_y = pd.concat([full_test_y, test_dataset_y])

In [9]:
# Initialize AutoSklearnRegressor
autosklearn_regressor = autosklearn.regression.AutoSklearnRegressor(
    time_left_for_this_task=1200,#If running on big dataset, then it probably needs a lot of time
    n_jobs=-1
    )

In [10]:
# Fit the model on the full training data
autosklearn_regressor.fit(full_train_X, full_train_y)


AutoSklearnRegressor(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                     n_jobs=-1, per_run_time_limit=480,
                     time_left_for_this_task=1200)

In [15]:
# Save the model
with open('autosklearn_model.pkl', 'wb') as f:
    pickle.dump(autosklearn_regressor, f)

In [37]:
# Load the model
with open('autosklearn_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

In [11]:
autosklearn_predictions = autosklearn_regressor.predict(full_test_X)
autosklearn_score = r2_score(full_test_y, autosklearn_predictions)
print("AutoSklearn R2 score:", autosklearn_score)

AutoSklearn R2 score: 0.9883380014271478


# Trying a third autoML-library: FLAML

In [15]:
!pip install flaml



In [12]:
from flaml import AutoML
from sklearn.utils import shuffle


In [13]:
# Convert to np.array

full_train_X = np.array(full_train_X)
full_train_y = np.array(full_train_y)
full_test_X = np.array(full_test_X)
full_test_y = np.array(full_test_y)

In [14]:
flaml = AutoML()
flaml.fit(full_train_X, full_train_y, task="regression", time_budget=100)

[flaml.automl.logger: 07-14 09:38:09] {1680} INFO - task = regression
[flaml.automl.logger: 07-14 09:38:09] {1691} INFO - Evaluation method: holdout


NameError: name 'shuffle' is not defined

In [None]:
# Save the model
with open("flaml_model.pkl", "wb") as f:
    pickle.dump(flaml, f, pickle.HIGHEST_PROTOCOL)

# At prediction time
with open("flaml_model.pkl", "rb") as f:
    flaml = pickle.load(f)
pred = flaml.predict(full_test_X)

In [None]:
flaml_predictions = r2_score(full_test_y, pred)

# Comparison and ensemble

In [16]:
print("AutoGluon R2 score:", eval['r2'])  # Assuming 'eval' contains AutoGluon's evaluation results
print("AutoSklearn R2 score:", autosklearn_score)


AutoGluon R2 score: 0.9915471075229365
AutoSklearn R2 score: 0.9883380014271478


In [17]:
ensemble_predictions = (pred_autogluon + autosklearn_predictions) / 2
ensemble_score = r2_score(full_test_y, ensemble_predictions)
print("Ensemble R2 score:", ensemble_score)

Ensemble R2 score: 0.99075414632952


In [18]:
ensemble_predictions_2 = (2*pred_autogluon + autosklearn_predictions) / 3
ensemble_score_2 = r2_score(full_test_y, ensemble_predictions_2)
print("Ensemble 2 R2 score:", ensemble_score_2)

Ensemble 2 R2 score: 0.991198820472765


In [23]:
ensemble_predictions_3 = (9*pred_autogluon + autosklearn_predictions) / 10
ensemble_score_3 = r2_score(full_test_y, ensemble_predictions_3)
print("Ensemble 3 R2 score:", ensemble_score_3)

Ensemble 3 R2 score: 0.9915183700303908


# Test the other datasets as well

In [4]:
# Brazilian houses gluon and autosklearn

base_path = '../../data/361098' # [Brazilian Houses (361098)]

full_train = None
full_test = None

for fold_number in range(1, 11):
    train_dataset, test_dataset = load_fold(fold_number, random_seed=random_seed, auto_gluon=True)
    if full_train is None:
        full_train = train_dataset
        full_test = test_dataset
    else:
        # Use pd.concat to combine TabularDatasets
        full_train = pd.concat([full_train, train_dataset])
        full_test = pd.concat([full_test, test_dataset])
        
# Make set out of all the folds

full_train_X = None
full_train_y = None
full_test_X = None
full_test_y = None

for fold_number in range(1, 11):
    train_dataset_X, train_dataset_y, test_dataset_X, test_dataset_y = load_fold(fold_number, random_seed=random_seed, auto_gluon=False, )
    if full_train_X is None:
        full_train_X = train_dataset_X
        full_train_y = train_dataset_y
        full_test_X = test_dataset_X
        full_test_y = test_dataset_y
    else:
        # Use pd.concat to combine TabularDatasets
        full_train_X = pd.concat([full_train_X, train_dataset_X])
        full_train_y = pd.concat([full_train_y, train_dataset_y])
        full_test_X = pd.concat([full_test_X, test_dataset_X])
        full_test_y = pd.concat([full_test_y, test_dataset_y])
        
label = 'total_(BRL)'

print(full_train.head())

     city  area  rooms  bathroom  parking_spaces animal furniture  hoa_(BRL)  \
7259    4    45      2         1               0      1         1        162   
4348    0   278      4         4               4      0         1       2400   
653     3   280      3         2               2      0         0       3140   
1595    4    90      3         2               1      0         1        934   
1858    3    80      2         2               1      1         1       1050   

      rent_amount_(BRL)  property_tax_(BRL)  fire_insurance_(BRL)  total_(BRL)  
7259               1610                   8                    21     7.496652  
4348              12000                 155                   160     9.597030  
653                8000                1126                   104     9.423110  
1595               1450                  65                    19     7.811568  
1858               3500                 167                    46     8.468843  


In [7]:
autogluon_prediction_brazil = fit_gluon(full_train, time_limit=300)
pred_autogluon = autogluon_prediction_brazil.predict(full_test.drop(columns=[label]))
eval = autogluon_prediction_brazil.evaluate(full_test)

No path specified. Models will be saved in: "AutogluonModels/ag-20240714_120516"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.9.19
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Tue Jun 18 14:00:06 UTC 2024
CPU Count:          4
Memory Avail:       13.56 GB / 15.43 GB (87.9%)
Disk Space Avail:   6.45 GB / 24.99 GB (25.8%)
	We recommend a minimum available disk space of 10 GB, and large datasets may require more.
Presets specified: ['medium_quality']
Beginning AutoGluon training ... Time limit = 300s
AutoGluon will save models to "AutogluonModels/ag-20240714_120516"
Train Data Rows:    96228
Train Data Columns: 11
Label Column:       total_(BRL)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    13884.00 MB
	Train Data (Original)  Memory Usage: 4.22 MB (0.0% of available memory)
	Inferri

[1000]	valid_set's l2: 0.000462257	valid_set's r2: 0.999278
[2000]	valid_set's l2: 0.000180341	valid_set's r2: 0.999718
[3000]	valid_set's l2: 0.000106465	valid_set's r2: 0.999834
[4000]	valid_set's l2: 7.59989e-05	valid_set's r2: 0.999881
[5000]	valid_set's l2: 5.74621e-05	valid_set's r2: 0.99991
[6000]	valid_set's l2: 4.5701e-05	valid_set's r2: 0.999929
[7000]	valid_set's l2: 3.79761e-05	valid_set's r2: 0.999941
[8000]	valid_set's l2: 3.18395e-05	valid_set's r2: 0.99995
[9000]	valid_set's l2: 2.72457e-05	valid_set's r2: 0.999957
[10000]	valid_set's l2: 2.34447e-05	valid_set's r2: 0.999963


	1.0	 = Validation score   (r2)
	50.13s	 = Training   runtime
	1.1s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 247.0s of the 247.0s of remaining time.


[1000]	valid_set's l2: 2.82911e-05	valid_set's r2: 0.999956
[2000]	valid_set's l2: 9.02244e-06	valid_set's r2: 0.999986
[3000]	valid_set's l2: 3.76679e-06	valid_set's r2: 0.999994
[4000]	valid_set's l2: 1.79252e-06	valid_set's r2: 0.999997
[5000]	valid_set's l2: 8.96027e-07	valid_set's r2: 0.999999
[6000]	valid_set's l2: 4.65609e-07	valid_set's r2: 0.999999
[7000]	valid_set's l2: 2.53375e-07	valid_set's r2: 1
[8000]	valid_set's l2: 1.3326e-07	valid_set's r2: 1
[9000]	valid_set's l2: 7.60301e-08	valid_set's r2: 1
[10000]	valid_set's l2: 4.33734e-08	valid_set's r2: 1


	1.0	 = Validation score   (r2)
	36.17s	 = Training   runtime
	0.67s	 = Validation runtime
Fitting model: RandomForestMSE ... Training model for up to 209.38s of the 209.38s of remaining time.
		'squared_error'
Detailed Traceback:
joblib.externals.loky.process_executor._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/_utils.py", line 72, in __call__
    return self.func(**kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/parallel.py", line 598, in __call__
    return [func(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/parallel.py", line 598, in <listcomp>
    return [func(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 222, in __call__
    return self.function(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/de

In [11]:
# Initialize AutoSklearnRegressor
autosklearn_regressor_brazil = autosklearn.regression.AutoSklearnRegressor(
    time_left_for_this_task=1200,#If running on big dataset, then it probably needs a lot of time
    n_jobs=-1,
    tmp_folder='/tmp/autosklearn_regression_brazil_tmp',
    )

In [13]:
autosklearn_regressor_brazil.fit(full_train_X, full_train_y)
autosklearn_predictions = autosklearn_regressor_brazil.predict(full_test_X)
autosklearn_score = r2_score(full_test_y, autosklearn_predictions)

In [14]:
print("AutoGluon R2 score:", eval['r2'])  # Assuming 'eval' contains AutoGluon's evaluation results
print("AutoSklearn R2 score:", autosklearn_score)

AutoGluon R2 score: 0.9999999999999176
AutoSklearn R2 score: 0.9999999987468355


In [15]:
ensemble_predictions_brazil = (2*pred_autogluon + autosklearn_predictions) / 3
ensemble_score_brazil = r2_score(full_test_y, ensemble_predictions_brazil)
print("Ensemble brazil R2 score:", ensemble_score_brazil)

Ensemble brazil R2 score: 0.9999999998606933


In [10]:
base_path = '../../data/361099' # Bike_Sharing_Demand (361099)
full_train = None
full_test = None

for fold_number in range(1, 11):
    train_dataset, test_dataset = load_fold(fold_number, random_seed=random_seed, auto_gluon=True)
    if full_train is None:
        full_train = train_dataset
        full_test = test_dataset
    else:
        # Use pd.concat to combine TabularDatasets
        full_train = pd.concat([full_train, train_dataset])
        full_test = pd.concat([full_test, test_dataset])
        
# Make set out of all the folds

full_train_X = None
full_train_y = None
full_test_X = None
full_test_y = None

for fold_number in range(1, 11):
    train_dataset_X, train_dataset_y, test_dataset_X, test_dataset_y = load_fold(fold_number, random_seed=random_seed, auto_gluon=False, )
    if full_train_X is None:
        full_train_X = train_dataset_X
        full_train_y = train_dataset_y
        full_test_X = test_dataset_X
        full_test_y = test_dataset_y
    else:
        # Use pd.concat to combine TabularDatasets
        full_train_X = pd.concat([full_train_X, train_dataset_X])
        full_train_y = pd.concat([full_train_y, train_dataset_y])
        full_test_X = pd.concat([full_test_X, test_dataset_X])
        full_test_y = pd.concat([full_test_y, test_dataset_y])
        
label = 'count'

print(full_train.head())
print(full_train_y['count'].count())


     season year  month  hour holiday workingday weather   temp  feel_temp  \
3937      2    0      6    23       0          1       0  25.42     28.790   
460       1    0      1     5       0          1       0   9.84      9.850   
2081      2    0      4    14       0          1       2  14.76     16.665   
8823      1    1      1    11       0          0       0  14.76     17.425   
6121      0    0      9    15       0          0       2  21.32     25.000   

      humidity  windspeed  count  
3937      0.83     8.9981    178  
460       0.60    27.9993      5  
2081      0.46    23.9994     87  
8823      0.46    15.0013    256  
6121      0.72    12.9980    354  
156411


In [17]:
autogluon_regressor_bike = fit_gluon(full_train, time_limit=300)
pred_autogluon_bike = autogluon_regressor_bike.predict(full_test.drop(columns=[label]))
eval = autogluon_regressor_bike.evaluate(full_test)

No path specified. Models will be saved in: "AutogluonModels/ag-20240714_130126"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.9.19
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Tue Jun 18 14:00:06 UTC 2024
CPU Count:          4
Memory Avail:       11.86 GB / 15.43 GB (76.9%)
Disk Space Avail:   6.36 GB / 24.99 GB (25.4%)
	We recommend a minimum available disk space of 10 GB, and large datasets may require more.
Presets specified: ['medium_quality']
Beginning AutoGluon training ... Time limit = 300s
AutoGluon will save models to "AutogluonModels/ag-20240714_130126"
Train Data Rows:    156411
Train Data Columns: 11
Label Column:       count
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    12150.65 MB
	Train Data (Original)  Memory Usage: 5.82 MB (0.0% of available memory)
	Inferring da

[1000]	valid_set's l2: 1258.62	valid_set's r2: 0.961215
[2000]	valid_set's l2: 972.316	valid_set's r2: 0.970038
[3000]	valid_set's l2: 840.709	valid_set's r2: 0.974093
[4000]	valid_set's l2: 747.825	valid_set's r2: 0.976956
[5000]	valid_set's l2: 674.932	valid_set's r2: 0.979202
[6000]	valid_set's l2: 618.722	valid_set's r2: 0.980934
[7000]	valid_set's l2: 569.293	valid_set's r2: 0.982457
[8000]	valid_set's l2: 523.268	valid_set's r2: 0.983875
[9000]	valid_set's l2: 486.17	valid_set's r2: 0.985019
[10000]	valid_set's l2: 453.801	valid_set's r2: 0.986016


	0.986	 = Validation score   (r2)
	68.88s	 = Training   runtime
	1.12s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 227.59s of the 227.58s of remaining time.


[1000]	valid_set's l2: 712.072	valid_set's r2: 0.978057
[2000]	valid_set's l2: 419.783	valid_set's r2: 0.987064
[3000]	valid_set's l2: 279.87	valid_set's r2: 0.991376
[4000]	valid_set's l2: 195.17	valid_set's r2: 0.993986
[5000]	valid_set's l2: 139.671	valid_set's r2: 0.995696
[6000]	valid_set's l2: 105.199	valid_set's r2: 0.996758
[7000]	valid_set's l2: 79.9901	valid_set's r2: 0.997535
[8000]	valid_set's l2: 61.8771	valid_set's r2: 0.998093
[9000]	valid_set's l2: 48.3998	valid_set's r2: 0.998509
[10000]	valid_set's l2: 38.9978	valid_set's r2: 0.998798


	0.9988	 = Validation score   (r2)
	55.27s	 = Training   runtime
	0.66s	 = Validation runtime
Fitting model: RandomForestMSE ... Training model for up to 170.91s of the 170.91s of remaining time.
		'squared_error'
Detailed Traceback:
joblib.externals.loky.process_executor._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/_utils.py", line 72, in __call__
    return self.func(**kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/parallel.py", line 598, in __call__
    return [func(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/joblib/parallel.py", line 598, in <listcomp>
    return [func(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 222, in __call__
    return self.function(*args, **kwargs)
  File "/home/studio-lab-user/.conda/envs

In [18]:
autosklearn_regressor_bike.fit(full_train_X, full_train_y)
autosklearn_predictions_bike = autosklearn_regressor_bike.predict(full_test_X)
autosklearn_score_bike = r2_score(full_test_y, autosklearn_predictions_bike)

NameError: name 'autosklearn_regressor_bike' is not defined

In [None]:
print("AutoGluon R2 score:", eval['r2'])  # Assuming 'eval' contains AutoGluon's evaluation results
print("AutoSklearn R2 score:", autosklearn_score_bike)

In [None]:
ensemble_predictions_bike = (2*pred_autogluon_bike + autosklearn_predictions_bike) / 3
ensemble_score_bike = r2_score(full_test_y, ensemble_predictions_bike)
print("Ensemble bike R2 score:", ensemble_score_bike)