In [56]:
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path

class MultilabelPredictor():
    """ Tabular Predictor for predicting multiple columns in table.
        Creates multiple TabularPredictor objects which you can also use individually.
        You can access the TabularPredictor for a particular label via: `multilabel_predictor.get_predictor(label_i)`

        Parameters
        ----------
        labels : List[str]
            The ith element of this list is the column (i.e. `label`) predicted by the ith TabularPredictor stored in this object.
        path : str, default = None
            Path to directory where models and intermediate outputs should be saved.
            If unspecified, a time-stamped folder called "AutogluonModels/ag-[TIMESTAMP]" will be created in the working directory to store all models.
            Note: To call `fit()` twice and save all results of each fit, you must specify different `path` locations or don't specify `path` at all.
            Otherwise files from first `fit()` will be overwritten by second `fit()`.
            Caution: when predicting many labels, this directory may grow large as it needs to store many TabularPredictors.
        problem_types : List[str], default = None
            The ith element is the `problem_type` for the ith TabularPredictor stored in this object.
        eval_metrics : List[str], default = None
            The ith element is the `eval_metric` for the ith TabularPredictor stored in this object.
        consider_labels_correlation : bool, default = True
            Whether the predictions of multiple labels should account for label correlations or predict each label independently of the others.
            If True, the ordering of `labels` may affect resulting accuracy as each label is predicted conditional on the previous labels appearing earlier in this list (i.e. in an auto-regressive fashion).
            Set to False if during inference you may want to individually use just the ith TabularPredictor without predicting all the other labels.
        kwargs :
            Arguments passed into the initialization of each TabularPredictor.

    """

    multi_predictor_file = 'multilabel_predictor.pkl'

    def __init__(self, labels, path=None, problem_types=None, eval_metrics=None, consider_labels_correlation=True, **kwargs):
        if len(labels) < 2:
            raise ValueError("MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor for predicting one label (column).")
        if (problem_types is not None) and (len(problem_types) != len(labels)):
            raise ValueError("If provided, `problem_types` must have same length as `labels`")
        if (eval_metrics is not None) and (len(eval_metrics) != len(labels)):
            raise ValueError("If provided, `eval_metrics` must have same length as `labels`")
        self.path = setup_outputdir(path, warn_if_exist=False)
        self.labels = labels
        self.consider_labels_correlation = consider_labels_correlation
        self.predictors = {}  # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
        if eval_metrics is None:
            self.eval_metrics = {}
        else:
            self.eval_metrics = {labels[i] : eval_metrics[i] for i in range(len(labels))}
        problem_type = None
        eval_metric = None
        for i in range(len(labels)):
            label = labels[i]
            path_i = self.path + "Predictor_" + label
            if problem_types is not None:
                problem_type = problem_types[i]
            if eval_metrics is not None:
                eval_metric = eval_metrics[i]
            self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric, path=path_i, **kwargs)

    def fit(self, train_data, tuning_data=None, **kwargs):
        """ Fits a separate TabularPredictor to predict each of the labels.

            Parameters
            ----------
            train_data, tuning_data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                See documentation for `TabularPredictor.fit()`.
            kwargs :
                Arguments passed into the `fit()` call for each TabularPredictor.
        """
        if isinstance(train_data, str):
            train_data = TabularDataset(train_data)
        if tuning_data is not None and isinstance(tuning_data, str):
            tuning_data = TabularDataset(tuning_data)
        train_data_og = train_data.copy()
        if tuning_data is not None:
            tuning_data_og = tuning_data.copy()
        else:
            tuning_data_og = None
        save_metrics = len(self.eval_metrics) == 0
        for i in range(len(self.labels)):
            label = self.labels[i]
            predictor = self.get_predictor(label)
            if not self.consider_labels_correlation:
                labels_to_drop = [l for l in self.labels if l != label]
            else:
                labels_to_drop = [self.labels[j] for j in range(i+1, len(self.labels))]
            train_data = train_data_og.drop(labels_to_drop, axis=1)
            if tuning_data is not None:
                tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
            print(f"Fitting TabularPredictor for label: {label} ...")
            predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
            self.predictors[label] = predictor.path
            if save_metrics:
                self.eval_metrics[label] = predictor.eval_metric
        self.save()

    def predict(self, data, **kwargs):
        """ Returns DataFrame with label columns containing predictions for each label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. If label columns are present in this data, they will be ignored. See documentation for `TabularPredictor.predict()`.
            kwargs :
                Arguments passed into the predict() call for each TabularPredictor.
        """
        return self._predict(data, as_proba=False, **kwargs)

    def predict_proba(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `predict_proba()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. See documentation for `TabularPredictor.predict()` and `TabularPredictor.predict_proba()`.
            kwargs :
                Arguments passed into the `predict_proba()` call for each TabularPredictor (also passed into a `predict()` call).
        """
        return self._predict(data, as_proba=True, **kwargs)

    def evaluate(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `evaluate()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to evalate predictions of all labels for, must contain all labels as columns. See documentation for `TabularPredictor.evaluate()`.
            kwargs :
                Arguments passed into the `evaluate()` call for each TabularPredictor (also passed into the `predict()` call).
        """
        data = self._get_data(data)
        eval_dict = {}
        for label in self.labels:
            print(f"Evaluating TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            eval_dict[label] = predictor.evaluate(data, **kwargs)
            if self.consider_labels_correlation:
                data[label] = predictor.predict(data, **kwargs)
        return eval_dict

    def save(self):
        """ Save MultilabelPredictor to disk. """
        for label in self.labels:
            if not isinstance(self.predictors[label], str):
                self.predictors[label] = self.predictors[label].path
        save_pkl.save(path=self.path+self.multi_predictor_file, object=self)
        print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

    @classmethod
    def load(cls, path):
        """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
        path = os.path.expanduser(path)
        if path[-1] != os.path.sep:
            path = path + os.path.sep
        return load_pkl.load(path=path+cls.multi_predictor_file)

    def get_predictor(self, label):
        """ Returns TabularPredictor which is used to predict this label. """
        predictor = self.predictors[label]
        if isinstance(predictor, str):
            return TabularPredictor.load(path=predictor)
        return predictor

    def _get_data(self, data):
        if isinstance(data, str):
            return TabularDataset(data)
        return data.copy()

    def _predict(self, data, as_proba=False, **kwargs):
        data = self._get_data(data)
        if as_proba:
            predproba_dict = {}
        for label in self.labels:
            print(f"Predicting with TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            if as_proba:
                predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
            data[label] = predictor.predict(data, **kwargs)
        if not as_proba:
            return data[self.labels]
        else:
            return predproba_dict

Below is multimodal



In [47]:
feature_columns = ['Home Team', 'Away Team', 'Win or Loss for Celtics', 'Win or Loss for Warriors']
label = 'Win or Loss for Warriors'
print("Summary of class variable: \n", train_data[label].describe())

Summary of class variable: 
 count    1.0
mean     1.0
std      NaN
min      1.0
25%      1.0
50%      1.0
75%      1.0
max      1.0
Name: Win or Loss for Warriors, dtype: float64


Below is non modal



In [61]:
train_data = TabularDataset("C:/Users/Ali-Akber/Desktop/basketballDataset.csv")

Loaded data from: C:/Users/Ali-Akber/Desktop/basketballDataset.csv | Columns = 7 / 7 | Rows = 45 -> 45


In [59]:
train_data = train_data.sample(random_state=0)
train_data.head()

Unnamed: 0,Date,Home Team,Away Team,Win or Loss for Celtics,Win or Loss for Warriors,Celtics score,Warriors score
30,15,Celtics,Warriors,2,1,95,96


In [71]:
labels = ['Win or Loss for Celtics','Win or Loss for Warriors','Celtics score', 'Warriors score']
problem_types = ['binary','binary','regression', 'regression']  # type of each prediction problem
save_path = 'agModels-predictEducationClass'
time_limit = 20
print("Summary of class variable: \n", train_data[label].describe())

Summary of class variable: 
 count    45.000000
mean      1.511111
std       0.505525
min       1.000000
25%       1.000000
50%       2.000000
75%       2.000000
max       2.000000
Name: Win or Loss for Warriors, dtype: float64


In [72]:
multi_predictor = MultilabelPredictor(labels=labels, problem_types=problem_types, path=save_path)
multi_predictor.fit(train_data, time_limit=time_limit)

Beginning AutoGluon training ... Time limit = 20s
AutoGluon will save models to "agModels-predictEducationClass\Predictor_Win or Loss for Celtics\"
AutoGluon Version:  0.4.2
Python Version:     3.7.13
Operating System:   Windows
Train Data Rows:    45
Train Data Columns: 3
Label Column: Win or Loss for Celtics
Preprocessing data ...
Selected class <--> label mapping:  class 1 = 2, class 0 = 1
	Note: For your binary classification, AutoGluon arbitrarily selected which label-value represents positive (2) vs negative (1) class.
	To explicitly set the positive_class, either rename classes to 1 and 0, or specify positive_class in Predictor init.
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    9253.26 MB
	Train Data (Original)  Memory Usage: 0.01 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features

Fitting TabularPredictor for label: Win or Loss for Celtics ...


	0.6667	 = Validation score   (accuracy)
	0.0s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: LightGBMXT ... Training model for up to 19.7s of the 19.7s of remaining time.
	0.4444	 = Validation score   (accuracy)
	0.08s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 19.61s of the 19.61s of remaining time.
	0.4444	 = Validation score   (accuracy)
	0.09s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestGini ... Training model for up to 19.51s of the 19.51s of remaining time.
	0.6667	 = Validation score   (accuracy)
	0.46s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: RandomForestEntr ... Training model for up to 18.92s of the 18.92s of remaining time.
	0.6667	 = Validation score   (accuracy)
	0.41s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: CatBoost ... Training model for up to 18.38s of the 18.38s of remaining time.
	0.6667	 = Validation score

Fitting TabularPredictor for label: Win or Loss for Warriors ...


	0.3333	 = Validation score   (accuracy)
	0.0s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: LightGBMXT ... Training model for up to 19.72s of the 19.72s of remaining time.
	0.5556	 = Validation score   (accuracy)
	0.07s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 19.65s of the 19.64s of remaining time.
	0.5556	 = Validation score   (accuracy)
	0.09s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestGini ... Training model for up to 19.55s of the 19.55s of remaining time.
	1.0	 = Validation score   (accuracy)
	0.46s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: RandomForestEntr ... Training model for up to 18.95s of the 18.95s of remaining time.
	1.0	 = Validation score   (accuracy)
	0.41s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: CatBoost ... Training model for up to 18.42s of the 18.42s of remaining time.
	0.8889	 = Validation score   (

Fitting TabularPredictor for label: Celtics score ...


	-14.2033	 = Validation score   (root_mean_squared_error)
	0.0s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: LightGBMXT ... Training model for up to 19.72s of the 19.72s of remaining time.
	-12.9238	 = Validation score   (root_mean_squared_error)
	0.07s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 19.64s of the 19.64s of remaining time.
	-12.9238	 = Validation score   (root_mean_squared_error)
	0.09s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ... Training model for up to 19.55s of the 19.55s of remaining time.
	-14.2497	 = Validation score   (root_mean_squared_error)
	0.34s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: CatBoost ... Training model for up to 19.09s of the 19.09s of remaining time.
	-12.4672	 = Validation score   (root_mean_squared_error)
	0.15s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ... Training m

Fitting TabularPredictor for label: Warriors score ...


	-12.0528	 = Validation score   (root_mean_squared_error)
	0.0s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: LightGBMXT ... Training model for up to 19.72s of the 19.72s of remaining time.
	-11.8278	 = Validation score   (root_mean_squared_error)
	0.07s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 19.64s of the 19.64s of remaining time.
	-11.8278	 = Validation score   (root_mean_squared_error)
	0.09s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ... Training model for up to 19.54s of the 19.54s of remaining time.
	-9.8754	 = Validation score   (root_mean_squared_error)
	0.43s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: CatBoost ... Training model for up to 18.97s of the 18.97s of remaining time.
	-10.4349	 = Validation score   (root_mean_squared_error)
	0.21s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ... Training mo

MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('agModels-predictEducationClass\')


In [69]:
test_data = TabularDataset("C:/Users/Ali-Akber/Desktop/bbtest.csv")
test_data.head()

Loaded data from: C:/Users/Ali-Akber/Desktop/bbtest.csv | Columns = 3 / 3 | Rows = 7 -> 7


Unnamed: 0,Date,Home Team,Away Team
0,46,Warriors,Celtics
1,47,Warriors,Celtics
2,48,Celtics,Warriors
3,49,Celtics,Warriors
4,50,Warriors,Celtics


In [73]:
y_pred = multi_predictor.predict(test_data)
print("Predictions:  \n", y_pred)
import pandas as pd
sub = pd.read_csv("C:/Users/Ali-Akber/Desktop/submission.csv")
sub[labels] = y_pred
sub.to_csv("C:/Users/Ali-Akber/Desktop/submission.csv", index=False)
sub.head()

Predicting with TabularPredictor for label: Win or Loss for Celtics ...
Predicting with TabularPredictor for label: Win or Loss for Warriors ...
Predicting with TabularPredictor for label: Celtics score ...
Predicting with TabularPredictor for label: Warriors score ...
Predictions:  
    Win or Loss for Celtics  Win or Loss for Warriors  Celtics score  \
0                        1                         2     109.553680   
1                        1                         2     109.564957   
2                        1                         2     110.135147   
3                        1                         2     110.169891   
4                        1                         2     109.625031   
5                        1                         2     110.240417   
6                        1                         2     109.679245   

   Warriors score  
0       99.541359  
1       99.490753  
2      105.684631  
3      105.792542  
4       99.368919  
5      105.989136  
6    

Unnamed: 0,Date,Home Team,Away Team,Win or Loss for Celtics,Win or Loss for Warriors,Celtics score,Warriors score
0,46,Warriors,Celtics,1,2,109.55368,99.541359
1,47,Warriors,Celtics,1,2,109.564957,99.490753
2,48,Celtics,Warriors,1,2,110.135147,105.684631
3,49,Celtics,Warriors,1,2,110.169891,105.792542
4,50,Warriors,Celtics,1,2,109.625031,99.368919
