In [None]:
# Install dependencies
!pip install -q torch torchvision transformers tokenizers datasets tqdm psutil nltk sacremoses sentencepiece pyarrow wandb matplotlib pandas numpy

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Clean up any existing directories
!rm -rf ArtificialIntelligent
!rm -rf Datasets Checkpoints

# Create fresh project structure
!mkdir -p Datasets Checkpoints

# Copy datasets from Drive
print("Copying dataset files from Drive...")
!cp -v /content/drive/MyDrive/Datasets/*.parquet Datasets/

# Clone repository
!git clone https://github.com/Duy-Thanh/ArtificialIntelligent.git
%cd ArtificialIntelligent

# Create symbolic links
!ln -sf ../Datasets .
!ln -sf ../Checkpoints .

# Verify directory structure
!ls -la
!ls -la Datasets

# Setup environment
!python colab_setup.py

In [None]:
# Debug directory structure and paths
print("Current working directory:")
!pwd

print("\nRoot directory contents:")
!ls -la /content/

print("\nProject directory contents:")
!ls -la /content/ArtificialIntelligent/

print("\nDatasets directory contents:")
!ls -la /content/Datasets/

print("\nSymbolic links:")
!readlink /content/ArtificialIntelligent/Datasets
!readlink /content/ArtificialIntelligent/Checkpoints

# Try to find all parquet files
print("\nSearching for parquet files:")
!find /content -name "*.parquet"

In [None]:
# Fix paths if needed
%cd /content/ArtificialIntelligent

# Remove old symlinks
!rm -f Datasets Checkpoints

# Create new absolute symlinks
!ln -sf /content/Datasets .
!ln -sf /content/Checkpoints .

# Verify new links
print("\nNew symbolic links:")
!ls -la | grep '^l'

# Verify dataset files are accessible
print("\nDataset files through symlink:")
!ls -la Datasets/*.parquet

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
# Run training with absolute paths
import os
project_dir = '/content/ArtificialIntelligent'
datasets_dir = '/content/Datasets'

# Set environment variables for paths
%env PYTHONPATH=$project_dir
%env DATASET_DIR=$datasets_dir

# Run training from project directory
%cd $project_dir
!python train.py