# Experiment: Comparison of TimeMIL and LSTM

##### Setup and Installs

In [None]:
# Step 2: Mount Google Drive (if using Google Drive for dataset/code)
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Step 1: Install dependencies
# !pip install torch pytorch-lightning scikit-learn pandas joblib

!pip install aeon==0.5.0 numpy==1.23.1 torch==1.13.1+cu117 torchvision==0.14.1+cu117 pytorch-lightning==1.8.6 torchmetrics==1.5.1 --extra-index-url https://download.pytorch.org/whl/cu117
!pip install scikit-learn pandas joblib

##### Installing dependencies for Interpretability assessment

In [None]:
!pip install captum
#!pip install matplotlib
#!pip install shap

##### Ensuring correct versions installed to use timemil

In [None]:
# Step 3: Check GPU availability
import torch
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available. Device set to CUDA.")
else:
    device = torch.device("cpu")
    print("GPU is not available. Device set to CPU.")

# Check if dependencies were correctly installed

import importlib

def check_dependency(package_name, expected_version):
    try:
        package = importlib.import_module(package_name)
        installed_version = package.__version__
        if installed_version == expected_version:
            print(f"{package_name} is correctly installed: {installed_version}")
        else:
            print(f"{package_name} version mismatch: expected {expected_version}, but found {installed_version}")
    except ImportError:
        print(f"{package_name} is not installed.")

# Expected versions
dependencies = {
    "aeon": "0.5.0",
    "numpy": "1.23.1",
    "torch": "1.13.1+cu117",
    "torchvision": "0.14.1+cu117",
    "pytorch_lightning": "1.8.6",
    "sklearn": "1.2.2",   # scikit-learn is accessed as "sklearn"
    "pandas": "2.0.3",
    "joblib": "1.4.2",
    "torchmetrics": "1.5.1"
}

# Run checks
for package, expected_version in dependencies.items():
    check_dependency(package, expected_version)


##### Reloading and chaning system path to newest project base folder

In [None]:
# In your notebook cell

%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_new')  # Adjust this if you saved your .py files elsewhere

from train import train_experiment


## Custom Dataset Experimentation

# Rotten Experiments

##### TimeMIL Rotten

In [None]:
# In your notebook cell

%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_new')  # Adjust this if you saved your .py files elsewhere

# Run TimeMIL on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'timemil'
dataset_name = 'rotten'  # Replace with actual dataset name [BasicMotions, SharePriceIncrease]
seeds = [46]

for i in range(0, 30):
    # get current_time
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    experiment_name = f"{model_name}_{dataset_name}_run_{i}_time_{current_time}"
    print(f"Experiment: {experiment_name}")

    train_experiment(
        run=i,
        dataset_name=dataset_name,
        model_name=model_name,
        data_dir=f"/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/data/rotten",
        experiment_name=experiment_name,
        #seed=seed,
        batch_size=64, # 128
        hidden_dim=128, # never change this for timemil
        num_layers=4,
        max_seq_len=60,
        dropout= 0.4, # current best at 0.4
        optimizer='adamw',
        lr= 1.0309706745004951e-05,# 1e-5, # current best at 1e-5
        weight_decay= 1.0859867475239952e-05,# 1e-4, # current best at 1e-4
        max_epochs=200,
        gradient_clip_val=0.43956229955444975,
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 50},
    )


##### TodyNet Rotten

In [None]:
%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_new')  # Adjust this if you saved your .py files elsewhere

# Run TodyNet on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'todynet'
dataset_name = 'rotten'  # Replace with actual dataset name
seeds = [46]

for i in range(0, 30):
    # get current_time
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    experiment_name = f"{model_name}_{dataset_name}_run_{i}_time_{current_time}_check"
    print(f"Experiment: {experiment_name}")

    # Define todynet parameters (without 'num_nodes')
    todynet_params = {
        'num_layers': 3,
        'groups': 1, # 1
        'pool_ratio': 0.3728438352957143, #0.2
        'kern_size': [9, 5, 3], # [9, 5, 3]
        'hidden_dim': 64, # 128
        'out_dim': 128, # 256
        'dropout': 0.3326848732885655 ,
        'gnn_model_type': 'dyGCN2d',
        'in_dim': 1
        # 'in_dim' and 'seq_length' will be set in train_experiment
    }

    train_experiment(
        run=i,
        dataset_name=dataset_name,
        model_name=model_name,
        data_dir=f"/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/data/rotten",
        experiment_name=experiment_name,
        #seed=seed,
        batch_size=16,
        hidden_dim=192,
        max_seq_len=60,
        dropout=0.17252497761075708,
        optimizer='adamw',
        lr=0.00018641731982861303,
        weight_decay=2.0321835493435088e-05,
        max_epochs=100,
        gradient_clip_val=0.5,
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 20},
        todynet_params=todynet_params
    )


##### Rotten LSTM

In [None]:
%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src')  # Adjust this if you saved your .py files elsewhere


# Run LSTMClassifier on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'lstm_classifier'
dataset_name = 'rotten'  # Replace with actual dataset name
seeds = [46]

for i in range(0, 30):
    # Get current time
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    experiment_name = f"{model_name}_{dataset_name}_run_{i}_time_{current_time}_check"
    print(f"Experiment: {experiment_name}")

    train_experiment(
        run=i,
        model_name=model_name,
        dataset_name=dataset_name,
        data_dir=f"/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/data/rotten",
        #aeon_dataset='SharePriceIncrease',
        experiment_name=experiment_name,
        #seed=seed,
        batch_size=64,         # Increased batch size for faster training if GPU memory allows
        hidden_dim=128,        # Increased hidden dimension
        num_layers=2,          # Increased number of layers
        bidirectional=False,    # Using a bidirectional LSTM
        max_seq_len=60,
        dropout=0.2764879613755455,           # Increased dropout to prevent overfitting
        optimizer='adamw',
        lr=5.965328183810809e-05,
        weight_decay=4.0205830285607866e-06,     # Adjusted weight decay
        max_epochs=200,
        gradient_clip_val=0.31629708444325294, # Increased gradient clipping value
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 50},
    )



# Aeon Datasets

##### SharePriceIncrease TimeMIL

In [None]:


# In your notebook cell

%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_new')  # Adjust this if you saved your .py files elsewhere

# Run TimeMIL on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'timemil'
dataset_name = 'SharePriceIncrease'  # Replace with actual dataset name [BasicMotions, SharePriceIncrease]
seeds = [46]

for i in range(10):
    # get current_time
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    experiment_name = f"{model_name}_{dataset_name}_run_{i}_time_{current_time}"
    print(f"Experiment: {experiment_name}")

    train_experiment(
        run=i,
        dataset_name=dataset_name,
        model_name=model_name,
        #data_dir=f"/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/data/twitter",
        aeon_dataset='SharePriceIncrease',
        experiment_name=experiment_name,
        #seed=seed,
        batch_size=64, # 128
        hidden_dim=128, # never change this for timemil
        num_layers=1,
        max_seq_len=60,
        dropout= 0.4873169054805524, # current best at 0.4
        optimizer='adamw',
        lr= 1.082096149444034e-05,# 1e-5, # current best at 1e-5
        weight_decay= 0.0022621952972201147,# 1e-4, # current best at 1e-4
        max_epochs=200,
        gradient_clip_val=0.49314467483726887,
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 50},
    )


##### SharePriceIncrease TodyNet

In [None]:
%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_new')  # Adjust this if you saved your .py files elsewhere

# Run TodyNet on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'todynet'
dataset_name = 'SharePriceIncrease'  # Replace with actual dataset name
seeds = [46]

for i in range(10):
    # get current_time
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    experiment_name = f"{model_name}_{dataset_name}_run_{i}_time_{current_time}_check"
    print(f"Experiment: {experiment_name}")

    # Define todynet parameters (without 'num_nodes')
    todynet_params = {
        'num_layers': 3,
        'groups': 1, # 1
        'pool_ratio': 0.23740150417551795, #0.2
        'kern_size': [7, 5, 3], # [9, 5, 3]
        'hidden_dim': 64, # 128
        'out_dim': 256, # 256
        'dropout': 0.2625681811874664,
        'gnn_model_type': 'dyGCN2d',
        'in_dim': 1
        # 'in_dim' and 'seq_length' will be set in train_experiment
    }

    train_experiment(
        run=i,
        dataset_name=dataset_name,
        model_name=model_name,
        #data_dir=f"/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/data/twitter",
        aeon_dataset='SharePriceIncrease',
        experiment_name=experiment_name,
        #seed=seed,
        batch_size=32,
        hidden_dim=256,
        max_seq_len=60,
        dropout=0.13748444505513727,
        optimizer='adamw',
        lr=0.0002857872828206899,
        weight_decay=1.4790851179473734e-06,
        max_epochs=100,
        gradient_clip_val=0.187201940376038,
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 20},
        todynet_params=todynet_params
    )


##### SharePriceIncrease LSTM Classifier

In [None]:
%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src')  # Adjust this if you saved your .py files elsewhere


# Run LSTMClassifier on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'lstm_classifier'
dataset_name = 'SharePriceIncrease'  # Replace with actual dataset name
seeds = [46]

for i in range(10):
    # Get current time
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    experiment_name = f"{model_name}_{dataset_name}_run_{i}_time_{current_time}_check"
    print(f"Experiment: {experiment_name}")

    train_experiment(
        run=i,
        model_name=model_name,
        dataset_name=dataset_name,
        #data_dir=f"/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/data/twitter",
        aeon_dataset='SharePriceIncrease',
        experiment_name=experiment_name,
        #seed=seed,
        batch_size=64,         # Increased batch size for faster training if GPU memory allows
        hidden_dim=128,        # Increased hidden dimension
        num_layers=1,          # Increased number of layers
        bidirectional=False,    # Using a bidirectional LSTM
        max_seq_len=60,
        dropout=0.38060887237497343,           # Increased dropout to prevent overfitting
        optimizer='adamw',
        lr=5.659990373880629e-05,
        weight_decay=3.356868512223481e-06,     # Adjusted weight decay
        max_epochs=200,
        gradient_clip_val=0.41800486528115716, # Increased gradient clipping value
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 50},
    )



## 6.1 Running Experiments with Aeon Datasets

In [None]:
# In your notebook cell

%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_assessment')  # Adjust this if you saved your .py files elsewhere

# Run TimeMIL on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'timemil'
dataset_name = 'SharePriceIncrease'  # Replace with actual dataset name [BasicMotions, SharePriceIncrease]
seeds = [46]

for seed in seeds:
    # get current_time
    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")


    experiment_name = f"{model_name}_{dataset_name}_seed_{seed}_time_{current_time}"
    print(f"Experiment: {experiment_name}")

    train_experiment(
        model_name=model_name,
        aeon_dataset='SharePriceIncrease', # dataset_name,
        experiment_name=experiment_name,
        seed=seed,
        batch_size=16,
        hidden_dim=128,
        max_seq_len=60,
        dropout=0.2,
        optimizer='adamw',
        lr=1e-4,
        weight_decay=1e-4,
        max_epochs=100,
        gradient_clip_val=0.5,
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 50},
    )


In [None]:
# In your notebook cell

%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src_interpretability_assessment')  # Adjust this if you saved your .py files elsewhere


# Run TodyNet on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'todynet'
dataset_name = 'SharePriceIncrease'  # Replace with actual dataset name
seeds = [46]

for seed in seeds:
    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")

    experiment_name = f"{model_name}_{dataset_name}_seed_{seed}_time_{current_time}"
    print(f"Experiment: {experiment_name}")

    # Define todynet parameters (without 'num_nodes')
    todynet_params = {
        'num_layers': 3,
        'groups': 4,
        'pool_ratio': 0.2,
        'kern_size': [9, 5, 3],
        'hidden_dim': 128,
        'out_dim': 256,
        'dropout': 0.3,
        'gnn_model_type': 'dyGIN2d',
        # 'in_dim' and 'seq_length' will be set in train_experiment
    }

    train_experiment(
        model_name=model_name,
        aeon_dataset=dataset_name,
        experiment_name=experiment_name,
        seed=seed,
        batch_size=16,
        hidden_dim=128,
        max_seq_len=60,
        dropout=0.2,
        optimizer='adamw',
        lr=1e-4,
        weight_decay=1e-4,
        max_epochs=100,
        gradient_clip_val=0.5,
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 20},
        todynet_params=todynet_params
    )


In [None]:
# In your notebook cell

%load_ext autoreload
%autoreload 2

# Add your module path for custom imports
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new')  # Adjust this if you saved your .py files elsewhere
sys.path.append('/content/drive/MyDrive/Colab Notebooks/work_project/modelling_new/src')  # Adjust this if you saved your .py files elsewhere


# Run LSTMClassifier on an aeon dataset with multiple seeds

from train import train_experiment
from datetime import datetime

model_name = 'lstm_classifier'
dataset_name = 'SharePriceIncrease'  # Replace with actual dataset name
seeds = [46]

for seed in seeds:
    # Get current time
    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")

    experiment_name = f"{model_name}_{dataset_name}_seed_{seed}_time_{current_time}"
    print(f"Experiment: {experiment_name}")

    train_experiment(
        model_name=model_name,
        aeon_dataset=dataset_name,
        experiment_name=experiment_name,
        seed=seed,
        batch_size=16,         # Increased batch size for faster training if GPU memory allows
        hidden_dim=256,        # Increased hidden dimension
        num_layers=3,          # Increased number of layers
        bidirectional=False,    # Using a bidirectional LSTM
        max_seq_len=60,
        dropout=0.3,           # Increased dropout to prevent overfitting
        optimizer='adamw',
        lr=1e-4,
        weight_decay=1e-5,     # Adjusted weight decay
        max_epochs=100,
        gradient_clip_val=1.0, # Increased gradient clipping value
        use_class_weights=True,
        scheduler='reduce_on_plateau',
        scheduler_params={'mode': 'min', 'factor': 0.5, 'patience': 20},
    )
