# Test 01: Environment Variables

**Purpose:** Verify that running the auto_update script correctly sets all cache-related environment variables BEFORE any library imports.

**Expected Results:**
- All cache paths should point to `home_workspace/` (CoCalc) or `cs_workspace/` (Compute Server)
- NO paths should point to `~/.cache`

**Run this on:** Both CoCalc base and Compute Server

In [None]:
# DS776 Environment Setup & Package Update
# Configures storage paths for proper cleanup/sync, then updates introdl if needed
# If this cell fails, see Lessons/Course_Tools/AUTO_UPDATE_SYSTEM.md for help
%run ../../Lessons/Course_Tools/auto_update_introdl.py

In [None]:
# Check environment variables BEFORE importing any libraries
import os
from pathlib import Path

print("=" * 60)
print("ENVIRONMENT VARIABLE CHECK (before library imports)")
print("=" * 60)

# List of cache-related environment variables to check
# Note: TRANSFORMERS_CACHE is deprecated in v5+; HF_HOME is the primary setting
cache_vars = [
    'TORCH_HOME',
    'HF_HOME',
    'HUGGINGFACE_HUB_CACHE',
    'HF_DATASETS_CACHE',
    'XDG_CACHE_HOME',
]

# Suppression variables
suppress_vars = [
    'TRANSFORMERS_NO_TF',
    'USE_TF',
    'TF_CPP_MIN_LOG_LEVEL',
]

home = Path.home()
bad_cache = str(home / '.cache')

print("\n--- Cache Path Variables ---")
all_good = True
for var in cache_vars:
    value = os.environ.get(var, 'NOT SET')
    if value == 'NOT SET':
        status = "WARNING: Not set"
        all_good = False
    elif bad_cache in value:
        status = "BAD: Points to ~/.cache!"
        all_good = False
    else:
        status = "OK"
    print(f"{var}:")
    print(f"  Value: {value}")
    print(f"  Status: {status}")
    print()

print("\n--- TF/Keras Suppression Variables ---")
for var in suppress_vars:
    value = os.environ.get(var, 'NOT SET')
    print(f"{var}: {value}")

print("\n" + "=" * 60)
if all_good:
    print("RESULT: All cache paths configured correctly!")
else:
    print("RESULT: Some issues detected - see above")
print("=" * 60)

In [None]:
# Detect environment type
import os
from pathlib import Path

home = Path.home()
print("\n--- Environment Detection ---")
print(f"Home directory: {home}")

# Check multiple CoCalc indicators (not all may be present)
is_cocalc = (home / '.cocalc').exists() or (home / '.smc').exists() or 'COCALC_PROJECT_ID' in os.environ

if is_cocalc:
    cs_workspace = home / 'cs_workspace'
    if cs_workspace.exists() and (home / 'home_workspace').exists():
        print("Environment: CoCalc COMPUTE SERVER")
        print(f"  - cs_workspace exists: {cs_workspace.exists()}")
        print(f"  - home_workspace exists: {(home / 'home_workspace').exists()}")
        print("\nExpected cache locations:")
        print(f"  - Downloads/models: ~/cs_workspace/downloads/")
        print(f"  - Datasets: ~/cs_workspace/data/")
    else:
        print("Environment: CoCalc HOME SERVER")
        print("\nExpected cache locations:")
        print(f"  - Downloads/models: ~/home_workspace/downloads/")
        print(f"  - Datasets: ~/home_workspace/data/")
else:
    print("Environment: Local Development or Other")
    if 'DS776_ROOT_DIR' in os.environ:
        print(f"  DS776_ROOT_DIR: {os.environ['DS776_ROOT_DIR']}")

In [None]:
# Now import libraries and verify they see the correct paths
import os
print("\n--- Importing Libraries ---")
print("Importing torch...")
import torch
print(f"  torch.hub.get_dir(): {torch.hub.get_dir()}")

print("\nChecking HF_HOME (primary HuggingFace cache setting)...")
print(f"  HF_HOME: {os.environ.get('HF_HOME', 'NOT SET')}")

print("\nImporting huggingface_hub...")
from huggingface_hub import constants as hf_constants
print(f"  HF_HUB_CACHE: {hf_constants.HF_HUB_CACHE}")
print(f"  HUGGINGFACE_HUB_CACHE: {hf_constants.HUGGINGFACE_HUB_CACHE}")

print("\nImporting datasets...")
import datasets
print(f"  datasets.config.HF_DATASETS_CACHE: {datasets.config.HF_DATASETS_CACHE}")

In [None]:
# Final verification - check for ~/.cache presence
import os
from pathlib import Path

home = Path.home()
bad_cache = home / '.cache'

print("\n--- Final Check: ~/.cache Status ---")
if bad_cache.exists():
    print(f"~/.cache exists at: {bad_cache}")
    
    # Check for huggingface subdirectory
    hf_cache = bad_cache / 'huggingface'
    if hf_cache.exists():
        print(f"\nWARNING: ~/.cache/huggingface exists!")
        print("Contents:")
        for item in hf_cache.iterdir():
            if item.is_dir():
                size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
                print(f"  {item.name}/: {size / 1024 / 1024:.1f} MB")
    else:
        print("Good: No ~/.cache/huggingface directory")
    
    # Check for torch subdirectory
    torch_cache = bad_cache / 'torch'
    if torch_cache.exists():
        print(f"\nWARNING: ~/.cache/torch exists!")
        print("Contents:")
        for item in torch_cache.iterdir():
            if item.is_dir():
                size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
                print(f"  {item.name}/: {size / 1024 / 1024:.1f} MB")
    else:
        print("Good: No ~/.cache/torch directory")
else:
    print("Good: ~/.cache does not exist")

## Summary

If all checks passed:
- Environment variables are set correctly
- Libraries see the correct cache paths
- Downloads should go to the right locations

**Next:** Run Test_02 to verify import order doesn't matter, then Test_03 and Test_04 to actually download models and verify locations.