<a href="https://colab.research.google.com/github/SelinDenizz/Freudian-Dream-Interpretation-Model/blob/main/notebook/data_trainer_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import importlib.util
import os

def is_installed(package_name):
    return importlib.util.find_spec(package_name) is not None

if not is_installed("unsloth"):

    !pip uninstall -y protobuf fsspec torch torchaudio torchvision
    !pip install "protobuf<4.0.0" fsspec==2025.3.0
    !pip install torch==2.6.0+cu124 torchaudio==2.6.0+cu124 torchvision==0.21.0+cu124 --index-url https://download.pytorch.org/whl/cu124
    !pip install -q unsloth transformers peft datasets gradio pandas tqdm
    !pip check

    requirements_path = "/content/drive/MyDrive/freudian_dream_analyzer/requirements.txt"
    !mkdir -p /content/drive/MyDrive/freudian_dream_analyzer
    !pip freeze > "{requirements_path}"
    print(f"requirements.txt saved at {requirements_path}")
else:
    print("Environment already installed, skipping setup.")

In [None]:
# Runtime & GPU check
import sys
import torch

IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    print("Running in Google Colab")

if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
else:
    print("No GPU available. Enable it from Runtime > Change runtime type.")

In [None]:
# First, unmount if already mounted
from google.colab import drive
try:
    drive.flush_and_unmount()
    print('Existing drive mount was unmounted')
except:
    print('No existing drive mount')

# Check and clean up the mount point
import os
import shutil
if os.path.exists('/content/drive'):
    if os.path.isdir('/content/drive') and os.listdir('/content/drive'):
        shutil.rmtree('/content/drive')
        print("Removed existing /content/drive directory and its contents")

    elif os.path.isfile('/content/drive'):
        os.remove('/content/drive')
        print("Removed existing /content/drive file")

os.makedirs('/content/drive', exist_ok=True)
print("Created fresh /content/drive directory")

drive.mount('/content/drive')

%cd /content/drive/MyDrive/freudian_dream_analyzer/

import sys
sys.path.append('/content/drive/MyDrive/freudian_dream_analyzer/')

In [None]:
import sys
import importlib.util
import os

project_root = "/content/drive/MyDrive/freudian_dream_analyzer"
sys.path.append(project_root)

module_path = os.path.join(project_root, "script/module/data_trainer.py")

spec = importlib.util.spec_from_file_location("data_trainer", module_path)
data_trainer = importlib.util.module_from_spec(spec)
spec.loader.exec_module(data_trainer)

UnslothTrainer = data_trainer.UnslothTrainer


In [None]:
trainer = UnslothTrainer(
    model_name="meta-llama/Llama-2-7b-chat-hf",
    max_seq_length=2048,
    micro_batch_size=1,
    gradient_accumulation_steps=4,
    num_epochs=3,
    learning_rate=2e-4,
    lora_r=int(16),
    lora_alpha=32,
    lora_dropout=0.05,
    bf16=True,
    tf32=False,
    save_steps=100
)

model_path = trainer.finetune(
    jsonl_file="/content/drive/MyDrive/freudian_dream_analyzer/data/dream/processed/fine_tuning_format/dreambank_finetune_llama.jsonl",
    output_dir="/content/drive/MyDrive/freudian_dream_analyzer/model/unsloth_model"
)

In [None]:
# =============================================================================
# UPLOAD YOUR CUSTOM MODEL TO GOOGLE COLAB
# Run this in a separate cell BEFORE running the main application
# =============================================================================

import os
from google.colab import files

# Create the model directory
model_dir = '/content/model/unsloth_model'
os.makedirs(model_dir, exist_ok=True)
print(f"✅ Created directory: {model_dir}")

# Upload your model files
print("""
📁 UPLOAD YOUR MODEL FILES:

From your local computer, you need to upload these files from your 'model/unsloth_model' folder:
- adapter_config.json
- adapter_model.safetensors
- special_tokens_map.json
- tokenizer_config.json
- tokenizer.json
- tokenizer.model
- Any other files in your model directory

Click 'Choose Files' below and select ALL your model files:
""")

# File upload interface
uploaded = files.upload()

# Move uploaded files to the correct directory
if uploaded:
    print(f"\n📂 Processing {len(uploaded)} uploaded files:")
    for filename, content in uploaded.items():
        filepath = os.path.join(model_dir, filename)
        with open(filepath, 'wb') as f:
            f.write(content)
        print(f"✅ Saved: {filename} ({len(content)} bytes)")

    print(f"\n🎯 Model files uploaded to: {model_dir}")

    # Verify the upload
    print(f"\n📋 Files in model directory:")
    try:
        for file in sorted(os.listdir(model_dir)):
            file_path = os.path.join(model_dir, file)
            size = os.path.getsize(file_path)
            print(f"  - {file} ({size:,} bytes)")
    except Exception as e:
        print(f"  Error listing files: {e}")

    # Check for required LoRA files
    adapter_config = os.path.join(model_dir, 'adapter_config.json')
    adapter_model = os.path.join(model_dir, 'adapter_model.safetensors')

    if os.path.exists(adapter_config):
        print(f"\n✅ Found adapter_config.json - LoRA model detected")

        # Read adapter config to get base model info
        try:
            import json
            with open(adapter_config, 'r') as f:
                config = json.load(f)
            base_model = config.get('base_model_name_or_path', 'Unknown')
            print(f"📊 Base model: {base_model}")
        except Exception as e:
            print(f"⚠️ Could not read adapter config: {e}")

    if os.path.exists(adapter_model):
        print(f"✅ Found adapter_model.safetensors")
    else:
        print(f"⚠️ adapter_model.safetensors not found - this might cause issues")

    print(f"\n🚀 Ready! Now run the main application script to use your custom model.")

else:
    print("❌ No files uploaded. Please try again.")

# =============================================================================
# ALTERNATIVE: Upload via Google Drive (if you have files there)
# =============================================================================

print("""
📁 ALTERNATIVE UPLOAD METHOD - Google Drive:

If your model files are in Google Drive, run this instead:

```python
from google.colab import drive
import shutil

# Mount Google Drive
drive.mount('/content/drive')

# Copy your model files from Drive
# Update this path to where your model files are stored in Drive
source_path = '/content/drive/MyDrive/your_model_folder/unsloth_model'
destination_path = '/content/model/unsloth_model'

if os.path.exists(source_path):
    shutil.copytree(source_path, destination_path, dirs_exist_ok=True)
    print(f"✅ Copied model from Google Drive: {source_path}")

    # List copied files
    for file in os.listdir(destination_path):
        print(f"  - {file}")
else:
    print(f"❌ Model folder not found in Google Drive: {source_path}")
    print("Please update the source_path to match your Drive folder structure")
```
""")

# =============================================================================
# VERIFY MODEL STRUCTURE
# =============================================================================

def verify_model_structure():
    """Verify if the uploaded model has the correct structure"""
    model_dir = '/content/model/unsloth_model'

    if not os.path.exists(model_dir):
        print("❌ Model directory doesn't exist yet")
        return False

    files = os.listdir(model_dir)
    if not files:
        print("❌ Model directory is empty")
        return False

    # Check for LoRA adapter files
    required_lora_files = ['adapter_config.json']
    lora_files_present = [f for f in required_lora_files if f in files]

    # Check for full model files
    common_model_files = ['config.json', 'pytorch_model.bin', 'model.safetensors']
    model_files_present = [f for f in common_model_files if f in files]

    if lora_files_present:
        print("✅ LoRA adapter model detected")
        return True
    elif model_files_present:
        print("✅ Full model detected")
        return True
    else:
        print("⚠️ Unknown model structure")
        print(f"Files found: {files}")
        return True  # Let the app try to load it anyway

# Run verification
print("\n" + "="*50)
print("MODEL VERIFICATION:")
verify_model_structure()