In [None]:
config_path = './config/test.json'

In [None]:
# Install dependencies
!pip -r requirements.txt

# Check CUDA availability
import torch
cuda_available = torch.cuda.is_available()
print(f"CUDA is {'available' if cuda_available else 'not available'}")

if not cuda_available:
    use_zluda = input("CUDA is not available. Are you using an AMD GPU on Windows? (y/n) ")
    if use_zluda.lower() == 'y':
        print("Proceeding with ZLUDA installation")
        %run scripts/install_zluda.py
    else:
        print("Error: An issue occurred during installation")
else:
    print("Installation completed")

# Note: This environment uses %pip,
# but remember to use !pip in regular command line

In [None]:
# Import necessary libraries
import os

# If mgds is cloned to the repository
# import sys
# sys.path.append('mgds/src')

import json
from modules.util.config.TrainConfig import TrainConfig
from modules.util.callbacks.TrainCallbacks import TrainCallbacks
from modules.util.commands.TrainCommands import TrainCommands
from modules.trainer.GenericTrainer import GenericTrainer

In [None]:
# Create an instance of TrainConfig
train_config = TrainConfig.default_values()
with open(config_path, "r") as f:
    train_config.from_dict(json.load(f))

# Suppress user warnings
import warnings
warnings.filterwarnings('ignore')

# Create directories loaded in TrainConfig if they don't exist
for dir_path in [train_config.debug_dir, train_config.workspace_dir, train_config.cache_dir]:
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# Set up callbacks and commands
callbacks = TrainCallbacks()
commands = TrainCommands()

# Start the training process
print("Destination_path: ", train_config.output_model_destination)
print("Workspace_path: ", train_config.workspace_dir)
print("Debug_path: ", train_config.debug_dir)
print("Cache_path: ", train_config.cache_dir)

trainer = GenericTrainer(train_config, callbacks, commands)
trainer.start()

try:
    # Execute training
    trainer.train()
except Exception as e:
    print(f"An error occurred during training: {e}")
finally:
    # Process at the end of training
    trainer.end()
    print("Training has completed")