# 🔍 Google Drive Data Verification for Seoul Heatwave Course

This notebook helps verify that data has been correctly uploaded to Google Drive.

## 1. Mount Google Drive

In [None]:
# This cell only works in Google Colab
import sys

if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    print("✅ Google Drive mounted successfully!")
else:
    print("⚠️ This notebook is designed for Google Colab")
    print("📝 To test locally, use the original data paths")

## 2. Check Data Structure

In [None]:
import os
import glob
from pathlib import Path

# Define possible data paths
possible_paths = [
    '/content/drive/MyDrive/seoul_heatwave_course',
    '/content/drive/My Drive/seoul_heatwave_course',
    '/content/drive/Shareddrives/seoul_heatwave_course',
    '../drive_upload/seoul_heatwave_course'  # Local test path
]

# Find the data
DATA_PATH = None
for path in possible_paths:
    if os.path.exists(path):
        DATA_PATH = path
        print(f"✅ Found data at: {DATA_PATH}")
        break

if DATA_PATH is None:
    print("❌ Data not found. Please check:")
    print("  1. Data is uploaded to Google Drive")
    print("  2. Folder name is 'seoul_heatwave_course'")
    print("  3. Drive is properly mounted")
else:
    # List contents
    print("\n📁 Directory structure:")
    for root, dirs, files in os.walk(DATA_PATH):
        level = root.replace(DATA_PATH, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        sub_indent = ' ' * 2 * (level + 1)
        for file in files[:3]:  # Show first 3 files
            print(f"{sub_indent}{file}")
        if len(files) > 3:
            print(f"{sub_indent}... and {len(files)-3} more files")

## 3. Verify S-DoT Data

In [None]:
if DATA_PATH:
    sdot_path = os.path.join(DATA_PATH, 'data', 'raw', 's-dot')
    
    if os.path.exists(sdot_path):
        csv_files = glob.glob(os.path.join(sdot_path, '*.csv'))
        
        print(f"📊 S-DoT Data Status:")
        print(f"  • Found {len(csv_files)} CSV files")
        
        if csv_files:
            total_size = sum(os.path.getsize(f) for f in csv_files) / (1024**3)  # GB
            print(f"  • Total size: {total_size:.2f} GB")
            
            print("\n📝 Files found:")
            for file in sorted(csv_files)[:5]:
                size_mb = os.path.getsize(file) / (1024**2)
                print(f"  ✓ {os.path.basename(file)} ({size_mb:.1f} MB)")
            
            if len(csv_files) > 5:
                print(f"  ... and {len(csv_files)-5} more files")
        else:
            print("  ❌ No CSV files found")
    else:
        print(f"❌ S-DoT data directory not found at {sdot_path}")

## 4. Verify External Data

In [None]:
if DATA_PATH:
    external_path = os.path.join(DATA_PATH, 'data', 'external')
    
    print("📋 External Data Status:")
    
    # Check sensor locations
    sensor_file = os.path.join(external_path, 'sensor_locations.xlsx')
    if os.path.exists(sensor_file):
        size_kb = os.path.getsize(sensor_file) / 1024
        print(f"  ✓ sensor_locations.xlsx ({size_kb:.0f} KB)")
    else:
        print("  ❌ sensor_locations.xlsx not found")
    
    # Check SGIS boundaries
    sgis_path = os.path.join(external_path, 'sgis_boundaries')
    if os.path.exists(sgis_path):
        zip_files = glob.glob(os.path.join(sgis_path, '*.zip'))
        print(f"\n  📍 SGIS Boundaries ({len(zip_files)} files):")
        for zip_file in zip_files:
            size_mb = os.path.getsize(zip_file) / (1024**2)
            print(f"    ✓ {os.path.basename(zip_file)} ({size_mb:.1f} MB)")
    else:
        print("  ❌ SGIS boundaries not found")

## 5. Test Data Loading

In [None]:
import pandas as pd

if DATA_PATH and csv_files:
    # Try to load a sample file
    sample_file = csv_files[0]
    
    print(f"🔍 Testing data load: {os.path.basename(sample_file)}")
    
    try:
        # Try UTF-8 first (for renamed files)
        df = pd.read_csv(sample_file, encoding='utf-8', nrows=5)
        print("  ✅ Successfully loaded with UTF-8 encoding")
    except:
        try:
            # Try Korean encoding
            df = pd.read_csv(sample_file, encoding='euc-kr', nrows=5)
            print("  ✅ Successfully loaded with EUC-KR encoding")
        except Exception as e:
            print(f"  ❌ Failed to load: {e}")
            df = None
    
    if df is not None:
        print(f"\n📊 Data shape: {df.shape}")
        print(f"📋 Columns: {df.columns.tolist()[:5]}...")
        print("\n✅ Data is ready for analysis!")

## 6. Verification Summary

In [None]:
print("=" * 50)
print("📊 GOOGLE DRIVE DATA VERIFICATION SUMMARY")
print("=" * 50)

checklist = {
    "Google Drive Mounted": 'google.colab' in sys.modules if 'google.colab' in sys.modules else False,
    "Data Folder Found": DATA_PATH is not None,
    "S-DoT Data (16 files)": len(csv_files) == 16 if 'csv_files' in locals() else False,
    "Sensor Locations": os.path.exists(sensor_file) if 'sensor_file' in locals() else False,
    "SGIS Boundaries": len(zip_files) == 3 if 'zip_files' in locals() else False,
    "Data Loadable": df is not None if 'df' in locals() else False
}

for item, status in checklist.items():
    icon = "✅" if status else "❌"
    print(f"{icon} {item}")

if all(checklist.values()):
    print("\n🎉 All checks passed! Data is ready for the course.")
else:
    print("\n⚠️ Some checks failed. Please review the issues above.")

print("\n💡 Next steps:")
print("  1. Share this folder with students (view-only)")
print("  2. Students add shortcut to their Drive")
print("  3. Run Week01 notebook to start the course")