In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

DATA_PATH = './data/'
ORIGINAL_DATA_PATH = DATA_PATH
SUBMISSIONS_PATH = './submissions/'
MODELS_PATH = './trained_models/'
TEMP_PATH = './temp/'

# Load the data
train = pd.read_csv(DATA_PATH + 'train.csv', index_col='id')
test = pd.read_csv(DATA_PATH + 'test.csv', index_col='id')
original = pd.read_csv(ORIGINAL_DATA_PATH + 'flood.csv')
original.index.rename('id', inplace=True)
new_train = pd.concat([train, original], axis=0)

In [2]:
from autogluon.tabular import TabularPredictor


# Initialize the TabularPredictor
predictor = TabularPredictor(
    label='FloodProbability',          # Target variable
    eval_metric='r2',                  # Optimization metric
    path=MODELS_PATH+'autogluon_models_gpu',        # Path to save models
    verbosity=4
).fit(
    train_data=new_train,              # Training data
    presets='best_quality',            # Use the best models and techniques available
    excluded_model_types=['RF', 'KNN'], # Exclude specific model types
    time_limit=3600*10, # Optional: set a time limit for AutoGluon to run
    ag_args_fit={'num_gpus': 1}
)

# Note: Ensure you have compatible versions of each model's library for GPU usage.
predictor.leaderboard()


Presets specified: ['best_quality']
User Specified kwargs:
{'ag_args_fit': {'num_gpus': 1},
 'auto_stack': True,
 'excluded_model_types': ['RF', 'KNN'],
 'num_bag_sets': 1}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_ensemble': None,
 'ag_args_fit': {'num_gpus': 1},
 'auto_stack': True,
 'calibrate': 'auto',
 'ds_args': {'clean_up_fits': True,
             'detection_time_frac': 0.25,
             'holdout_data': None,
             'holdout_frac': 0.1111111111111111,
             'memory_safe_fits': True,
             'n_folds': 2,
             'n_repeats': 1,
             'validation_procedure': 'holdout'},
 'excluded_model_types': ['RF', 'KNN'],
 'feature_generator': 'auto',
 'feature_prune_kwargs': None,
 'holdout_frac': None,
 'hyperparameter_tune_kwargs': None,
 'included_model_types': None,
 'keep_only_best': False,
 'name_suffix': None,
 'num_bag_folds': None,
 'num_bag_sets': 1,
 'num_stack_levels': None,
 'pseudo_data'

KeyboardInterrupt: 

In [None]:

performance = predictor.evaluate(new_train)  # Evaluate on the same dataset or a validation set
print("R^2 Score on Evaluation:", performance)

prediction = predictor.predict(test)

submission = pd.DataFrame(prediction, index=test.index, columns=['FloodProbability'])
submission.to_csv(SUBMISSIONS_PATH + 'autogluon_gpu_3.csv')

Loading: ./trained_models/autogluon_models_gpu/models/NeuralNetFastAI/model.pkl
Loading: ./trained_models/autogluon_models_gpu/models/NeuralNetFastAImodel-internals.pkl
Loading: ./trained_models/autogluon_models_gpu/models/NeuralNetTorch/model.pkl
Loading: ./trained_models/autogluon_models_gpu/models/WeightedEnsemble_L2/model.pkl
Loading: ./trained_models/autogluon_models_gpu/models/NeuralNetFastAI/model.pkl
Loading: ./trained_models/autogluon_models_gpu/models/NeuralNetFastAImodel-internals.pkl


R^2 Score on Evaluation: {'r2': 0.8625383927816617, 'root_mean_squared_error': -0.01890619215427047, 'mean_squared_error': -0.00035744410177419834, 'mean_absolute_error': -0.01490130442051072, 'pearsonr': 0.9288081075166698, 'median_absolute_error': -0.01208611011505123}


Loading: ./trained_models/autogluon_models_gpu/models/NeuralNetTorch/model.pkl
Loading: ./trained_models/autogluon_models_gpu/models/WeightedEnsemble_L2/model.pkl
