In [3]:
!pip install requests pandas

Collecting requests
  Using cached requests-2.32.4-py3-none-any.whl.metadata (4.9 kB)
Collecting charset_normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Downloading urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2025.6.15-py3-none-any.whl.metadata (2.4 kB)
Using cached requests-2.32.4-py3-none-any.whl (64 kB)
Using cached charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl (198 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Downloading urllib3-2.5.0-py3-none-any.whl (129 kB)
Using cached certifi-2025.6.15-py3-none-any.whl (157 kB)
Installing collected packages: urllib3, idna, charset_normalizer, certifi, requests
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5

In [None]:
#!/usr/bin/env python3
"""
G2 Cluster script to upload CSV results to Flask API
Run this after your MIP/Bayesian optimization algorithms complete
"""

import requests
import os
import glob
import logging
from pathlib import Path
import time
from typing import List, Dict
import sys

# Configuration
API_BASE_URL = "http://cornellschedulingteam.orie.cornell.edu:8000"  # Replace with your actual website URL
#API_BASE_URL = "http://localhost:8000"#'http://0.0.0.0:8000'#"http://localhost:8000"
API_KEY = ''#'your-production-secret-key-here-change-this'#os.environ.get("UPLOAD_API_KEY", "your-secret-api-key-here")
RESULTS_DIR = "../results"  # Directory where your CSVs are generated
RETRY_ATTEMPTS = 3
RETRY_DELAY = 5  # seconds

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    handlers=[
        logging.FileHandler("upload_results.log"),
        logging.StreamHandler(sys.stdout),
    ],
)
logger = logging.getLogger(__name__)


def find_csv_files(directory: str) -> List[str]:
    """Find all CSV files in the results directory"""
    csv_pattern = os.path.join(directory, "sp25/metrics/20250614*.csv")
    csv_files = glob.glob(csv_pattern, recursive=True)

    # Filter out any files you don't want to upload
    # Example: exclude temporary or intermediate files
    filtered_files = [f for f in csv_files if not f.endswith("_temp.csv")]

    logger.info(f"Found {len(filtered_files)} CSV files to upload")
    return filtered_files


def upload_files_to_api(csv_files: List[str]) -> Dict:
    """Upload CSV files to the Flask API"""

    # Prepare files for upload
    files = []
    try:
        for csv_file in csv_files:
            files.append(
                (
                    "files",
                    (os.path.basename(csv_file), open(csv_file, "rb"), "text/csv"),
                )
            )

        # Prepare headers
        headers = {"X-API-Key": API_KEY}

        # Make the request
        logger.info(
            f"Uploading {len(csv_files)} files to {API_BASE_URL}/api/upload-metrics"
        )

        response = requests.post(
            f"{API_BASE_URL}/api/upload-metrics",
            files=files,
            headers=headers,
            timeout=300,  # 5 minute timeout for large files
        )

        # Parse response
        if response.status_code in [200, 207]:  # 207 = partial success
            result = response.json()
            logger.info(f"Upload successful: {result}")
            return result
        else:
            logger.error(f"Upload failed: {response.status_code} - {response.text}")
            return {"error": f"HTTP {response.status_code}: {response.text}"}

    except requests.exceptions.RequestException as e:
        logger.error(f"Network error during upload: {e}")
        return {"error": f"Network error: {e}"}

    except Exception as e:
        logger.error(f"Unexpected error during upload: {e}")
        return {"error": f"Unexpected error: {e}"}

    finally:
        # Close all file handles
        for _, file_tuple in files:
            if len(file_tuple) >= 2 and hasattr(file_tuple[1], "close"):
                file_tuple[1].close()


def upload_with_retry(csv_files: List[str]) -> bool:
    """Upload files with retry logic"""

    for attempt in range(1, RETRY_ATTEMPTS + 1):
        logger.info(f"Upload attempt {attempt}/{RETRY_ATTEMPTS}")

        result = upload_files_to_api(csv_files)

        if "error" not in result:
            # Success
            processed_files = result.get("processed_files", [])
            errors = result.get("errors", [])

            logger.info(f"Successfully processed {len(processed_files)} files")

            if errors:
                logger.warning(f"Some files had errors: {errors}")

            return True
        else:
            # Error occurred
            logger.error(f"Attempt {attempt} failed: {result['error']}")

            if attempt < RETRY_ATTEMPTS:
                logger.info(f"Retrying in {RETRY_DELAY} seconds...")
                time.sleep(RETRY_DELAY)

    logger.error(f"All {RETRY_ATTEMPTS} attempts failed")
    return False


def check_api_health() -> bool:
    """Check if the API is accessible"""
    try:
        response = requests.get(f"{API_BASE_URL}/api/upload-status", timeout=10)
        if response.status_code == 200:
            status = response.json()
            logger.info(f"API health check passed: {status}")
            return True
        else:
            logger.error(f"API health check failed: {response.status_code}")
            return False
    except Exception as e:
        logger.error(f"API health check error: {e}")
        return False


def main():
    """Main function to run the upload process"""
    logger.info("Starting CSV upload process")

    # Check if results directory exists
    if not os.path.exists(RESULTS_DIR):
        logger.error(f"Results directory not found: {RESULTS_DIR}")
        sys.exit(1)

    # Check API health
    if not check_api_health():
        logger.error("API health check failed. Aborting upload.")
        sys.exit(1)

    # Find CSV files
    csv_files = find_csv_files(RESULTS_DIR)
    print(csv_files)
    if not csv_files:
        logger.warning("No CSV files found to upload")
        return

    # Upload files
    success = upload_with_retry(csv_files)

    if success:
        logger.info("Upload process completed successfully")

        # Optional: Archive or cleanup uploaded files
        # archive_uploaded_files(csv_files)

    else:
        logger.error("Upload process failed")
        sys.exit(1)


def archive_uploaded_files(csv_files: List[str]):
    """Optional: Move uploaded files to archive directory"""
    archive_dir = os.path.join(RESULTS_DIR, "uploaded")
    os.makedirs(archive_dir, exist_ok=True)

    for csv_file in csv_files:
        archive_path = os.path.join(archive_dir, os.path.basename(csv_file))
        os.rename(csv_file, archive_path)
        logger.info(f"Archived {csv_file} to {archive_path}")


if __name__ == "__main__":
    main()


2025-07-04 17:37:59,271 INFO: Starting CSV upload process
2025-07-04 17:37:59,536 INFO: API health check passed: {'database_connected': True, 'max_file_size_mb': 0, 'metrics_count': 4527, 'status': 'healthy', 'timestamp': '2025-07-04T16:37:59.727753', 'upload_folder': ''}
2025-07-04 17:37:59,598 INFO: Found 214 CSV files to upload
['../results/sp25/metrics/20250614_192306i29-b841f75cc2ece8aa01303e36f22146b7.csv', '../results/sp25/metrics/20250614_075849i32-53cb986b35a7d9441e28cddd65901951.csv', '../results/sp25/metrics/20250614_192306i183-098df1d9bd308e5472d31178e543d735.csv', '../results/sp25/metrics/20250614_192306i99-52b5b4fad2a7ae28425bc942d6e3ad86.csv', '../results/sp25/metrics/20250614_192306i153-aba07d40b571cd0947cdb7f0e79ed26a.csv', '../results/sp25/metrics/20250614_075849i37-b9b1e8e4d86b12116402c53de6c33c2b.csv', '../results/sp25/metrics/20250614_192306i119-a0820601e913ecd013f07d57c77640be.csv', '../results/sp25/metrics/20250614_192306i114-27199c46d176e9419702bb9522f617fa.csv'

In [31]:
#!/usr/bin/env python3
"""
G2 Cluster script to upload schedule CSV results to Flask API
Run this after your scheduling algorithms complete
"""

import requests
import os
import glob
import logging
from pathlib import Path
import time
from typing import List, Dict
import sys

# Configuration
#API_BASE_URL = "http://cornellschedulingteam.orie.cornell.edu:8000"  # Replace with your actual website URL
API_BASE_URL = "http://localhost:8000"  # Uncomment for local testing
API_KEY = ''  # 'your-production-secret-key-here-change-this'
SCHEDULES_DIR = "/Users/adamshafikjovine/Documents/BOScheduling/results/sp25/schedules"  # Directory where your schedule CSVs are generated
SEMESTER = "sp25"  # Current semester
RETRY_ATTEMPTS = 3
RETRY_DELAY = 5  # seconds

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    handlers=[
        logging.FileHandler("upload_schedules.log"),
        logging.StreamHandler(sys.stdout),
    ],
)
logger = logging.getLogger(__name__)


def find_schedule_csv_files(directory: str) -> List[str]:
    """Find all schedule CSV files in the schedules directory"""
    csv_pattern = os.path.join(directory, "20250614*.csv")
    csv_files = glob.glob(csv_pattern, recursive=False)

    # Filter out any files you don't want to upload
    # Example: exclude temporary, backup, or test files
    filtered_files = [
        f for f in csv_files 
        if not any(exclude in os.path.basename(f).lower() 
                  for exclude in ["_temp", "_backup", "_test", "temp_"])
    ]

    logger.info(f"Found {len(filtered_files)} schedule CSV files to upload")
    return filtered_files


def upload_schedules_to_api(csv_files: List[str], semester: str) -> Dict:
    """Upload schedule CSV files to the Flask API"""

    # Prepare files for upload
    files = []
    try:
        for csv_file in csv_files:
            files.append(
                (
                    "files",
                    (os.path.basename(csv_file), open(csv_file, "rb"), "text/csv"),
                )
            )

        # Prepare headers and form data
        headers = {"X-API-Key": API_KEY}
        data = {"semester": semester}

        # Make the request
        logger.info(
            f"Uploading {len(csv_files)} schedule files to {API_BASE_URL}/api/upload-schedules"
        )

        response = requests.post(
            f"{API_BASE_URL}/api/upload-schedules",
            files=files,
            data=data,
            headers=headers,
            timeout=300,  # 5 minute timeout for large files
        )

        # Parse response
        if response.status_code in [200, 207]:  # 207 = partial success
            result = response.json()
            logger.info(f"Schedule upload successful: {result.get('message', 'Success')}")
            
            # Log details about processed files
            for file_result in result.get('processed_files', []):
                filename = file_result.get('filename', 'Unknown')
                if file_result.get('error'):
                    logger.warning(f"  {filename}: ERROR - {file_result['error']}")
                else:
                    records = file_result.get('records_processed', 0)
                    schedule_id = file_result.get('schedule_id', 'Unknown')
                    logger.info(f"  {filename}: SUCCESS - {records} records for schedule {schedule_id}")
            
            return result
        else:
            logger.error(f"Schedule upload failed: {response.status_code} - {response.text}")
            return {"error": f"HTTP {response.status_code}: {response.text}"}

    except requests.exceptions.RequestException as e:
        logger.error(f"Network error during schedule upload: {e}")
        return {"error": f"Network error: {e}"}

    except Exception as e:
        logger.error(f"Unexpected error during schedule upload: {e}")
        return {"error": f"Unexpected error: {e}"}

    finally:
        # Close all file handles
        for _, file_tuple in files:
            if len(file_tuple) >= 2 and hasattr(file_tuple[1], "close"):
                file_tuple[1].close()


def upload_schedules_with_retry(csv_files: List[str], semester: str) -> bool:
    """Upload schedule files with retry logic"""

    for attempt in range(1, RETRY_ATTEMPTS + 1):
        logger.info(f"Schedule upload attempt {attempt}/{RETRY_ATTEMPTS}")

        result = upload_schedules_to_api(csv_files, semester)

        if "error" not in result:
            # Success
            processed_files = result.get("processed_files", [])
            total_records = result.get("total_records", 0)

            logger.info(f"Successfully processed {len(processed_files)} files, {total_records} total records")

            # Check for any file-level errors
            file_errors = [f for f in processed_files if f.get('error')]
            if file_errors:
                logger.warning(f"Some files had errors:")
                for error_file in file_errors:
                    logger.warning(f"  {error_file['filename']}: {error_file['error']}")

            return True
        else:
            # Error occurred
            logger.error(f"Attempt {attempt} failed: {result['error']}")

            if attempt < RETRY_ATTEMPTS:
                logger.info(f"Retrying in {RETRY_DELAY} seconds...")
                time.sleep(RETRY_DELAY)

    logger.error(f"All {RETRY_ATTEMPTS} attempts failed")
    return False


def check_schedule_api_health() -> bool:
    """Check if the schedule API is accessible"""
    try:
        response = requests.get(f"{API_BASE_URL}/api/schedule-upload-status", timeout=10)
        if response.status_code == 200:
            status = response.json()
            logger.info(f"Schedule API health check passed: {status.get('message', 'OK')}")
            total_records = status.get('total_schedule_records', 0)
            logger.info(f"Current database contains {total_records} schedule records")
            return True
        else:
            logger.error(f"Schedule API health check failed: {response.status_code}")
            return False
    except Exception as e:
        logger.error(f"Schedule API health check error: {e}")
        return False


def main():
    """Main function to run the schedule upload process"""
    logger.info(f"Starting schedule CSV upload process for semester: {SEMESTER}")

    # Check if schedules directory exists
    if not os.path.exists(SCHEDULES_DIR):
        logger.error(f"Schedules directory not found: {SCHEDULES_DIR}")
        sys.exit(1)

    # Check API health
    if not check_schedule_api_health():
        logger.error("Schedule API health check failed. Aborting upload.")
        sys.exit(1)

    # Find schedule CSV files
    csv_files = find_schedule_csv_files(SCHEDULES_DIR)
    logger.info(f"Schedule files found: {[os.path.basename(f) for f in csv_files]}")
    
    if not csv_files:
        logger.warning("No schedule CSV files found to upload")
        return

    # Upload files
    success = upload_schedules_with_retry(csv_files, SEMESTER)

    if success:
        logger.info("Schedule upload process completed successfully")

        # Optional: Archive or cleanup uploaded files
        # archive_uploaded_schedule_files(csv_files)

    else:
        logger.error("Schedule upload process failed")
        sys.exit(1)


def archive_uploaded_schedule_files(csv_files: List[str]):
    """Optional: Move uploaded schedule files to archive directory"""
    archive_dir = os.path.join(SCHEDULES_DIR, "uploaded")
    os.makedirs(archive_dir, exist_ok=True)

    for csv_file in csv_files:
        archive_path = os.path.join(archive_dir, os.path.basename(csv_file))
        os.rename(csv_file, archive_path)
        logger.info(f"Archived {csv_file} to {archive_path}")


if __name__ == "__main__":
    main()

2025-07-04 19:17:58,715 INFO: Starting schedule CSV upload process for semester: sp25


2025-07-04 19:18:08,670 INFO: Schedule API health check passed: Schedule upload service is operational
2025-07-04 19:18:08,676 INFO: Current database contains 3586742 schedule records
2025-07-04 19:18:08,759 INFO: Found 234 schedule CSV files to upload
2025-07-04 19:18:08,760 INFO: Schedule files found: ['20250614_192306i29-b841f75cc2ece8aa01303e36f22146b7.csv', '20250614_075849i32-53cb986b35a7d9441e28cddd65901951.csv', '20250614_192306i183-098df1d9bd308e5472d31178e543d735.csv', '20250614_192306i99-52b5b4fad2a7ae28425bc942d6e3ad86.csv', '20250614_192306i153-aba07d40b571cd0947cdb7f0e79ed26a.csv', '20250614_075849i37-b9b1e8e4d86b12116402c53de6c33c2b.csv', '20250614_192306i119-a0820601e913ecd013f07d57c77640be.csv', '20250614_192306i114-27199c46d176e9419702bb9522f617fa.csv', '20250614_192306i140-9013cd903892a340caa5a43c2972fac9.csv', '20250614_181924i1-942856b0e73d31921e4355bf54b03c4b.csv', '20250614_192306i161-30a1577607ba378054af5a9db9cb07b3.csv', '20250614_192306i24-6c0ed36d9503ed9193b7

2025-07-04 20:07:56,396 INFO: Found 214 CSV files to upload


['../results/sp25/metrics/20250614_192306i29-b841f75cc2ece8aa01303e36f22146b7.csv',
 '../results/sp25/metrics/20250614_075849i32-53cb986b35a7d9441e28cddd65901951.csv',
 '../results/sp25/metrics/20250614_192306i183-098df1d9bd308e5472d31178e543d735.csv',
 '../results/sp25/metrics/20250614_192306i99-52b5b4fad2a7ae28425bc942d6e3ad86.csv',
 '../results/sp25/metrics/20250614_192306i153-aba07d40b571cd0947cdb7f0e79ed26a.csv',
 '../results/sp25/metrics/20250614_075849i37-b9b1e8e4d86b12116402c53de6c33c2b.csv',
 '../results/sp25/metrics/20250614_192306i119-a0820601e913ecd013f07d57c77640be.csv',
 '../results/sp25/metrics/20250614_192306i114-27199c46d176e9419702bb9522f617fa.csv',
 '../results/sp25/metrics/20250614_192306i140-9013cd903892a340caa5a43c2972fac9.csv',
 '../results/sp25/metrics/20250614_192306i161-30a1577607ba378054af5a9db9cb07b3.csv',
 '../results/sp25/metrics/20250614_192306i24-6c0ed36d9503ed9193b727059a1fca4d.csv',
 '../results/sp25/metrics/20250614_192306i9-fc285d76cc39ba548e4ff3963e

In [22]:
#!/usr/bin/env python3
"""
G2 Cluster script to upload image files to Flask API
Run this after your visualization/chart generation algorithms complete
"""
import requests
import os
import glob
import logging
from pathlib import Path
import time
from typing import List, Dict
import sys

# Configuration
API_BASE_URL = "http://localhost:8000"  # Uncomment for local testing
API_KEY = ''  # 'your-production-secret-key-here-change-this'
IMAGES_DIR = "/Users/adamshafikjovine/Documents/BOScheduling/metrics/plots"  # Directory where your image files are generated
SUBDIRECTORY = "plots"  # Optional subdirectory in static folder (e.g., "charts", "plots", "visualizations")
RETRY_ATTEMPTS = 3
RETRY_DELAY = 5  # seconds
BATCH_SIZE = 5  # Number of images to upload per request (to avoid timeouts)

# Supported image extensions
SUPPORTED_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp', '.tiff'}

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    handlers=[
        logging.FileHandler("upload_images.log"),
        logging.StreamHandler(sys.stdout),
    ],
)
logger = logging.getLogger(__name__)


def clean_filename(filename: str) -> str:
    """Clean filename by removing .csv and other unwanted parts"""
    
    # Remove .csv from filename (including .csv_dist -> _dist)
    if '.csv_dist.' in filename:
        # Handle case: file.csv_dist.png -> file_dist.png
        filename = filename.replace('.csv_dist.', '_dist.')
    elif '.csv.' in filename:
        # Handle case: file.csv.png -> file.png
        filename = filename.replace('.csv.', '.')
    
    # Additional cleanup options:
    # Remove hash patterns if desired (optional)
    # filename = re.sub(r'-[a-f0-9]{32}', '', filename)  # Remove MD5 hashes
    
    # Remove extra underscores or dashes (optional)
    # filename = re.sub(r'[-_]+', '_', filename)  # Replace multiple dashes/underscores with single underscore
    
    return filename


def find_image_files(directory: str) -> List[str]:
    """Find all image files in the images directory"""
    image_files = []
    
    # Search for all supported image extensions
    for ext in SUPPORTED_EXTENSIONS:
        pattern = os.path.join(directory, f"20250618*{ext}")
        files = glob.glob(pattern, recursive=False)
        logger.debug(f'Found files for {ext}: {len(files)}')
        image_files.extend(files)
        
        # Also check uppercase extensions
        pattern_upper = os.path.join(directory, f"*{ext.upper()}")
        files_upper = glob.glob(pattern_upper, recursive=False)
        image_files.extend(files_upper)

    # Remove duplicates and filter out unwanted files
    image_files = list(set(image_files))
    filtered_files = [
        f for f in image_files 
        if not any(exclude in os.path.basename(f).lower() 
                  for exclude in ["_temp", "_backup", "_test", "temp_", ".tmp"])
    ]

    logger.info(f"Found {len(filtered_files)} image files to upload")
    return sorted(filtered_files)


def upload_images_to_api(image_files: List[str], subdirectory: str = "") -> Dict:
    """Upload image files to the Flask API with cleaned filenames"""

    # Prepare files for upload
    files = []
    filename_mapping = {}  # Track original -> cleaned filename mapping
    
    try:
        for image_file in image_files:
            # Get original filename
            original_filename = os.path.basename(image_file)
            
            # Clean the filename
            clean_filename_str = clean_filename(original_filename)
            
            # Store mapping for logging
            filename_mapping[clean_filename_str] = original_filename
            
            # Determine MIME type based on extension
            ext = os.path.splitext(image_file)[1].lower()
            if ext in ['.jpg', '.jpeg']:
                mime_type = 'image/jpeg'
            elif ext == '.png':
                mime_type = 'image/png'
            elif ext == '.gif':
                mime_type = 'image/gif'
            elif ext == '.bmp':
                mime_type = 'image/bmp'
            elif ext == '.webp':
                mime_type = 'image/webp'
            elif ext == '.tiff':
                mime_type = 'image/tiff'
            else:
                mime_type = 'image/jpeg'  # default
            
            # Use cleaned filename for upload
            files.append(
                (
                    "files",
                    (clean_filename_str, open(image_file, "rb"), mime_type),
                )
            )
            
            # Log the filename change if it was modified
            if original_filename != clean_filename_str:
                logger.info(f"Filename cleaned: {original_filename} -> {clean_filename_str}")

        # Prepare headers and form data
        data = {}
        if subdirectory:
            data["subdirectory"] = subdirectory

        # Make the request
        logger.info(f"Uploading {len(image_files)} image files to {API_BASE_URL}/api/upload-images")
        if subdirectory:
            logger.info(f"Target subdirectory: static/{subdirectory}/")

        response = requests.post(
            f"{API_BASE_URL}/api/upload-images",
            files=files,
            data=data,
            timeout=300,  # 5 minute timeout for large files
        )

        # Parse response
        if response.status_code in [200, 207]:  # 207 = partial success
            result = response.json()
            logger.info(f"Image upload successful: {result.get('message', 'Success')}")
            
            # Log details about processed files
            for file_result in result.get('processed_files', []):
                filename = file_result.get('filename', 'Unknown')
                if file_result.get('error'):
                    logger.warning(f"  {filename}: ERROR - {file_result['error']}")
                else:
                    uploaded_filename = file_result.get('uploaded_filename', 'Unknown')
                    web_path = file_result.get('uploaded_path', 'Unknown')
                    size_mb = file_result.get('size_bytes', 0) / (1024 * 1024)
                    dimensions = f"{file_result.get('width', '?')}x{file_result.get('height', '?')}"
                    
                    # Show original filename in parentheses if it was changed
                    original = filename_mapping.get(filename, filename)
                    filename_display = f"{filename} (was: {original})" if original != filename else filename
                    
                    logger.info(f"  {filename_display}: SUCCESS - {uploaded_filename} ({dimensions}, {size_mb:.1f}MB)")
                    logger.info(f"    Web URL: {web_path}")
            
            return result
        else:
            logger.error(f"Image upload failed: {response.status_code} - {response.text}")
            return {"error": f"HTTP {response.status_code}: {response.text}"}

    except requests.exceptions.RequestException as e:
        logger.error(f"Network error during image upload: {e}")
        return {"error": f"Network error: {e}"}

    except Exception as e:
        logger.error(f"Unexpected error during image upload: {e}")
        return {"error": f"Unexpected error: {e}"}

    finally:
        # Close all file handles
        for _, file_tuple in files:
            if len(file_tuple) >= 2 and hasattr(file_tuple[1], "close"):
                file_tuple[1].close()


def upload_images_with_retry(image_files: List[str], subdirectory: str = "") -> bool:
    """Upload image files with retry logic"""

    for attempt in range(1, RETRY_ATTEMPTS + 1):
        logger.info(f"Image upload attempt {attempt}/{RETRY_ATTEMPTS}")

        result = upload_images_to_api(image_files, subdirectory)

        if "error" not in result:
            # Success
            processed_files = result.get("processed_files", [])
            total_size_mb = result.get("total_size_bytes", 0) / (1024 * 1024)

            logger.info(f"Successfully processed {len(processed_files)} files, {total_size_mb:.1f}MB total")

            # Check for any file-level errors
            file_errors = [f for f in processed_files if f.get('error')]
            if file_errors:
                logger.warning(f"Some files had errors:")
                for error_file in file_errors:
                    logger.warning(f"  {error_file['filename']}: {error_file['error']}")

            return True
        else:
            # Error occurred
            logger.error(f"Attempt {attempt} failed: {result['error']}")

            if attempt < RETRY_ATTEMPTS:
                logger.info(f"Retrying in {RETRY_DELAY} seconds...")
                time.sleep(RETRY_DELAY)

    logger.error(f"All {RETRY_ATTEMPTS} attempts failed")
    return False


def upload_in_batches(image_files: List[str], subdirectory: str = "") -> bool:
    """Upload image files in batches to avoid timeouts"""
    
    total_files = len(image_files)
    successful_batches = 0
    total_batches = (total_files + BATCH_SIZE - 1) // BATCH_SIZE  # Ceiling division
    
    logger.info(f"Uploading {total_files} files in {total_batches} batches of {BATCH_SIZE}")
    
    for i in range(0, total_files, BATCH_SIZE):
        batch = image_files[i:i + BATCH_SIZE]
        batch_num = (i // BATCH_SIZE) + 1
        
        logger.info(f"Processing batch {batch_num}/{total_batches} ({len(batch)} files)")
        
        success = upload_images_with_retry(batch, subdirectory)
        
        if success:
            successful_batches += 1
            logger.info(f"Batch {batch_num} completed successfully")
        else:
            logger.error(f"Batch {batch_num} failed")
            # Continue with remaining batches rather than stopping
        
        # Small delay between batches to be nice to the server
        if i + BATCH_SIZE < total_files:
            time.sleep(1)
    
    logger.info(f"Batch upload complete: {successful_batches}/{total_batches} batches successful")
    return successful_batches == total_batches


def check_image_api_health() -> bool:
    """Check if the image API is accessible"""
    try:
        response = requests.get(f"{API_BASE_URL}/api/image-upload-status", timeout=10)
        if response.status_code == 200:
            status = response.json()
            logger.info(f"Image API health check passed: {status.get('message', 'OK')}")
            total_images = status.get('total_images', 0)
            total_size_mb = status.get('total_size_mb', 0)
            logger.info(f"Current static directory contains {total_images} images ({total_size_mb:.1f}MB)")
            return True
        else:
            logger.error(f"Image API health check failed: {response.status_code}")
            return False
    except Exception as e:
        logger.error(f"Image API health check error: {e}")
        return False


def list_existing_images(subdirectory: str = "") -> List[str]:
    """List existing images on the server to avoid duplicates"""
    try:
        params = {"subdirectory": subdirectory} if subdirectory else {}
        response = requests.get(f"{API_BASE_URL}/api/images", params=params, timeout=10)
        
        if response.status_code == 200:
            result = response.json()
            existing_files = [img['filename'] for img in result.get('images', [])]
            logger.info(f"Found {len(existing_files)} existing images on server")
            return existing_files
        else:
            logger.warning(f"Could not list existing images: {response.status_code}")
            return []
    except Exception as e:
        logger.warning(f"Could not list existing images: {e}")
        return []


def filter_new_images(image_files: List[str], existing_images: List[str]) -> List[str]:
    """Filter out images that already exist on the server (comparing cleaned filenames)"""
    new_files = []
    for filepath in image_files:
        original_filename = os.path.basename(filepath)
        cleaned_filename = clean_filename(original_filename)
        
        if cleaned_filename not in existing_images:
            new_files.append(filepath)
        else:
            logger.info(f"Skipping {original_filename} -> {cleaned_filename} (already exists on server)")
    
    logger.info(f"Found {len(new_files)} new images to upload (skipped {len(image_files) - len(new_files)} existing)")
    return new_files


def main():
    """Main function to run the image upload process"""
    logger.info(f"Starting image upload process")
    logger.info(f"Images directory: {IMAGES_DIR}")
    logger.info(f"Target subdirectory: {SUBDIRECTORY or 'static (root)'}")

    # Check if images directory exists
    if not os.path.exists(IMAGES_DIR):
        logger.error(f"Images directory not found: {IMAGES_DIR}")
        sys.exit(1)

    # Find image files
    image_files = find_image_files(IMAGES_DIR)
    logger.info(f"Image files found: {[os.path.basename(f) for f in image_files[:10]]}{'...' if len(image_files) > 10 else ''}")
    
    if not image_files:
        logger.warning("No image files found to upload")
        return

    # Optional: Check for existing images to avoid duplicates
    existing_images = list_existing_images(SUBDIRECTORY)
    if existing_images:
        image_files = filter_new_images(image_files, existing_images)
        
        if not image_files:
            logger.info("All images already exist on server. Nothing to upload.")
            return

    # Upload files in batches
    if len(image_files) <= BATCH_SIZE:
        # Small number of files, upload all at once
        success = upload_images_with_retry(image_files, SUBDIRECTORY)
    else:
        # Large number of files, upload in batches
        success = upload_in_batches(image_files, SUBDIRECTORY)

    if success:
        logger.info("Image upload process completed successfully")
    else:
        logger.error("Image upload process failed")
        sys.exit(1)


if __name__ == "__main__":
    main()

2025-07-04 23:30:32,523 INFO: Starting image upload process
2025-07-04 23:30:32,525 INFO: Images directory: /Users/adamshafikjovine/Documents/BOScheduling/metrics/plots
2025-07-04 23:30:32,526 INFO: Target subdirectory: plots
2025-07-04 23:30:32,539 INFO: Found 42 image files to upload
2025-07-04 23:30:32,539 INFO: Image files found: ['20250618_084824some_emptyi6-8d0078732dc682ba4635f756b052cbe9.csv.png', '20250618_084824some_emptyi6-8d0078732dc682ba4635f756b052cbe9.csv_dist.png', '20250618_1126422324i127-7c9a292ed4f3032bd2c09b4b7d93eae4.csv.png', '20250618_1126422324i127-7c9a292ed4f3032bd2c09b4b7d93eae4.csv_dist.png', '20250618_1126422324i135-db9101a8620a86349afb78dbd004fb34.csv.png', '20250618_1126422324i135-db9101a8620a86349afb78dbd004fb34.csv_dist.png', '20250618_1126422324i151-dde79de178361831f7a1ac9de0d67af5.csv.png', '20250618_1126422324i151-dde79de178361831f7a1ac9de0d67af5.csv_dist.png', '20250618_1126422324i230-db9942e2e6dd7b9c2286c6abf1f84a78.csv.png', '20250618_1126422324i23

In [None]:
#!/usr/bin/env python3
"""
Script to check files in the backend/app/static/plots directory
"""

import os
import glob
from pathlib import Path
from datetime import datetime
from typing import List, Dict

def check_static_plots_directory():
    """Check files in the backend/app/static/plots directory"""
    
    # Define the path
    plots_dir = "../backend/app/static/plots"
    absolute_path = os.path.abspath(plots_dir)
    
    print("=" * 60)
    print("CHECKING BACKEND STATIC PLOTS DIRECTORY")
    print("=" * 60)
    print(f"Directory path: {plots_dir}")
    print(f"Absolute path: {absolute_path}")
    print()
    
    # Check if directory exists
    if not os.path.exists(plots_dir):
        print("❌ Directory does not exist!")
        return
    
    if not os.path.isdir(plots_dir):
        print("❌ Path exists but is not a directory!")
        return
    
    print("✅ Directory exists")
    
    # Check permissions
    readable = os.access(plots_dir, os.R_OK)
    writable = os.access(plots_dir, os.W_OK)
    executable = os.access(plots_dir, os.X_OK)
    
    print(f"📁 Directory permissions:")
    print(f"   Read: {'✅' if readable else '❌'}")
    print(f"   Write: {'✅' if writable else '❌'}")
    print(f"   Execute: {'✅' if executable else '❌'}")
    print()
    
    # List all files
    try:
        all_files = os.listdir(plots_dir)
        all_files.sort()
        
        print(f"📊 Total items in directory: {len(all_files)}")
        print()
        
        # Filter for image files
        image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp', '.tiff'}
        image_files = []
        other_files = []
        
        for file in all_files:
            file_path = os.path.join(plots_dir, file)
            if os.path.isfile(file_path):
                ext = os.path.splitext(file)[1].lower()
                if ext in image_extensions:
                    image_files.append(file)
                else:
                    other_files.append(file)
        
        print(f"🖼️  Image files found: {len(image_files)}")
        print(f"📄 Other files found: {len(other_files)}")
        print()
        
        # Show image files with details
        if image_files:
            print("IMAGE FILES:")
            print("-" * 50)
            total_size = 0
            
            for filename in image_files:
                file_path = os.path.join(plots_dir, filename)
                try:
                    stat = os.stat(file_path)
                    size_mb = stat.st_size / (1024 * 1024)
                    modified = datetime.fromtimestamp(stat.st_mtime)
                    total_size += stat.st_size
                    
                    print(f"📸 {filename}")
                    print(f"   Size: {size_mb:.2f} MB")
                    print(f"   Modified: {modified.strftime('%Y-%m-%d %H:%M:%S')}")
                    print()
                    
                except Exception as e:
                    print(f"❌ Error reading {filename}: {e}")
            
            total_size_mb = total_size / (1024 * 1024)
            print(f"💾 Total image size: {total_size_mb:.2f} MB")
            print()
        
        # Show other files
        if other_files:
            print("OTHER FILES:")
            print("-" * 50)
            for filename in other_files:
                file_path = os.path.join(plots_dir, filename)
                try:
                    stat = os.stat(file_path)
                    size_kb = stat.st_size / 1024
                    print(f"📄 {filename} ({size_kb:.1f} KB)")
                except Exception as e:
                    print(f"❌ Error reading {filename}: {e}")
            print()
        
        # Check for specific date patterns (like your 20250614 files)
        date_pattern_files = []
        for file in image_files:
            if file.startswith("20250614"):
                date_pattern_files.append(file)
        
        if date_pattern_files:
            print(f"📅 Files matching '20250614*' pattern: {len(date_pattern_files)}")
            for file in date_pattern_files[:10]:  # Show first 10
                print(f"   - {file}")
            if len(date_pattern_files) > 10:
                print(f"   ... and {len(date_pattern_files) - 10} more")
            print()
        
        # Directory structure
        print("DIRECTORY STRUCTURE:")
        print("-" * 50)
        for root, dirs, files in os.walk(plots_dir):
            level = root.replace(plots_dir, '').count(os.sep)
            indent = ' ' * 2 * level
            print(f"{indent}{os.path.basename(root)}/")
            subindent = ' ' * 2 * (level + 1)
            for file in files[:5]:  # Show first 5 files per directory
                print(f"{subindent}{file}")
            if len(files) > 5:
                print(f"{subindent}... and {len(files) - 5} more files")
    
    except PermissionError:
        print("❌ Permission denied reading directory")
    except Exception as e:
        print(f"❌ Error reading directory: {e}")


def check_docker_mount_compatibility():
    """Check if the directory structure is compatible with Docker mounting"""
    
    print("=" * 60)
    print("DOCKER MOUNT COMPATIBILITY CHECK")
    print("=" * 60)
    
    # Check if backend directory exists
    backend_dir = "../backend"
    if not os.path.exists(backend_dir):
        print("❌ ./backend directory not found!")
        print("   Make sure you're running this from the project root")
        return
    
    # Check the full path structure
    paths_to_check = [
        "../backend",
        "../backend/app", 
        "../backend/app/static",
        "../backend/app/static/plots"
    ]
    
    for path in paths_to_check:
        if os.path.exists(path):
            print(f"✅ {path}")
        else:
            print(f"❌ {path} - Missing!")
    
    print()
    
    # Check if we're in the right directory
    current_files = os.listdir(".")
    expected_files = ["backend", "docker-compose.yml", "Dockerfile"]
    
    print("Current directory contains:")
    for file in current_files:
        marker = "✅" if file in expected_files else "📄"
        print(f"   {marker} {file}")
    
    missing_expected = [f for f in expected_files if f not in current_files]
    if missing_expected:
        print(f"\n⚠️  Missing expected files: {missing_expected}")
        print("   You might not be in the project root directory")


if __name__ == "__main__":
    check_static_plots_directory()
    print()
    check_docker_mount_compatibility()

CHECKING BACKEND STATIC PLOTS DIRECTORY
Directory path: ../backend/app/static/plots
Absolute path: /Users/adamshafikjovine/Documents/BOScheduling/backend/app/static/plots

✅ Directory exists
📁 Directory permissions:
   Read: ✅
   Write: ✅
   Execute: ✅

📊 Total items in directory: 1292

🖼️  Image files found: 1292
📄 Other files found: 0

IMAGE FILES:
--------------------------------------------------
📸 20250624_041000i10random-acb3bf9268fe61f634aa05e162258c4c.png
   Size: 0.10 MB
   Modified: 2025-07-02 01:24:10

📸 20250624_041000i10random-acb3bf9268fe61f634aa05e162258c4c_dist.png
   Size: 0.06 MB
   Modified: 2025-07-02 01:24:10

📸 20250624_041000i11random-86d59c8115a1802420114e88ce8b3064.png
   Size: 0.10 MB
   Modified: 2025-07-02 01:24:10

📸 20250624_041000i11random-86d59c8115a1802420114e88ce8b3064_dist.png
   Size: 0.06 MB
   Modified: 2025-07-02 01:24:10

📸 20250624_041000i12random-0e8bf1c032aa730093a40668fd3a51e7.png
   Size: 0.10 MB
   Modified: 2025-07-02 01:24:10

📸 20250624_

In [None]:
#!/usr/bin/env python3
"""
Test script for metrics upload functionality
Creates sample metrics CSV files and tests the upload process
"""

import os
import pandas as pd
import tempfile
import shutil
#from metrics.upload import metrics_csv_upload, check_api_health
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_sample_metrics_csvs(temp_dir: str, num_files: int = 5) -> str:
    """
    Create sample metrics CSV files for testing
    
    Args:
        temp_dir: Directory to create files in
        num_files: Number of sample files to create
        
    Returns:
        Path to directory containing sample files
    """
    metrics_dir = os.path.join(temp_dir, "metrics")
    os.makedirs(metrics_dir, exist_ok=True)
    
    # Sample metrics data matching your schema
    sample_data = {
        "conflicts": [0, 1, 2, 0, 1],
        "quints": [0, 0, 1, 0, 0], 
        "quads": [1, 2, 1, 3, 2],
        "four in five slots": [2, 3, 1, 4, 2],
        "triple in 24h (no gaps)": [3, 2, 4, 1, 3],
        "triple in same day (no gaps)": [1, 1, 2, 0, 1],
        "three in four slots": [4, 3, 5, 2, 4],
        "evening/morning b2b": [0, 1, 0, 2, 1],
        "other b2b": [2, 1, 3, 1, 2],
        "two in three slots": [5, 4, 6, 3, 5],
        "singular late exam": [1, 0, 1, 2, 1],
        "two exams, large gap": [0, 1, 0, 1, 0],
        "avg_max": [0.75, 0.82, 0.68, 0.91, 0.77],
        "lateness": [10, 15, 8, 20, 12],
        "size_cutoff": [100, 150, 120, 180, 130],
        "reserved": [5, 8, 6, 10, 7],
        "num_blocks": [12, 15, 11, 18, 14],
        "alpha": [0.1, 0.15, 0.12, 0.08, 0.13],
        "gamma": [0.2, 0.25, 0.18, 0.22, 0.21],
        "delta": [0.3, 0.28, 0.32, 0.27, 0.29],
        "vega": [0.05, 0.08, 0.06, 0.04, 0.07],
        "theta": [0.15, 0.12, 0.18, 0.14, 0.16],
        "large_block_size": [1.5, 1.8, 1.6, 2.0, 1.7],
        "large_exam_weight": [2.0, 2.5, 2.2, 1.8, 2.3],
        "large_block_weight": [1.2, 1.5, 1.3, 1.0, 1.4],
        "large_size_1": [0.8, 0.9, 0.85, 0.75, 0.88],
        "large_cutoff_freedom": [0.6, 0.7, 0.65, 0.55, 0.68],
        "tradeoff": [0.4, 0.45, 0.42, 0.38, 0.43],
        "flpens": [0.25, 0.3, 0.28, 0.22, 0.27]
    }
    
    # Create individual CSV files
    for i in range(num_files):
        # Create single-row DataFrame for this schedule
        row_data = {col: [values[i]] for col, values in sample_data.items()}
        df = pd.DataFrame(row_data)
        
        # Save to CSV file
        filename = f"schedule_{i+1:03d}.csv"
        filepath = os.path.join(metrics_dir, filename)
        df.to_csv(filepath, index=False)
        
        logger.info(f"Created sample file: {filename}")
    
    logger.info(f"Created {num_files} sample metrics CSV files in {metrics_dir}")
    return metrics_dir


def test_api_endpoints():
    """Test API health and endpoints"""
    logger.info("Testing API endpoints...")
    
    api_url = "http://localhost:5000"  # Adjust for your local setup
    
    # Test health endpoint
    if check_api_health(api_url):
        logger.info("✅ API health check passed")
    else:
        logger.error("❌ API health check failed")
        return False
    
    return True


def test_complete_workflow():
    """Test the complete metrics upload workflow"""
    logger.info("Starting complete workflow test...")
    
    # Create temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:
        logger.info(f"Using temporary directory: {temp_dir}")
        
        # Step 1: Create sample CSV files
        metrics_dir = create_sample_metrics_csvs(temp_dir, num_files=3)
        
        # Step 2: Test API health
        if not test_api_endpoints():
            logger.error("API endpoints not available - skipping upload test")
            return False
        
        # Step 3: Test upload (dry run first)
        logger.info("Testing dry run...")
        
        # Import and test dry run logic
        from metrics.upload import find_metrics_csv_files, combine_metrics_csvs
        
        csv_files = find_metrics_csv_files(metrics_dir)
        logger.info(f"Found {len(csv_files)} CSV files")
        
        combined_df = combine_metrics_csvs(csv_files, "test_semester")
        if combined_df is not None:
            logger.info(f"✅ Successfully combined {len(combined_df)} records")
            logger.info(f"Columns: {list(combined_df.columns)}")
            logger.info("Sample data:")
            print(combined_df.head())
        else:
            logger.error("❌ Failed to combine CSV files")
            return False
        
        # Step 4: Test actual upload (uncomment when API is running)
        # logger.info("Testing actual upload...")
        # success = metrics_csv_upload(
        #     metrics_dir, 
        #     semester="test_semester",
        #     api_url="http://localhost:5000"
        # )
        # 
        # if success:
        #     logger.info("✅ Upload test successful")
        # else:
        #     logger.error("❌ Upload test failed")
        #     return False
    
    logger.info("✅ Complete workflow test passed!")
    return True


def main():
    """Run all tests"""
    logger.info("=" * 60)
    logger.info("METRICS UPLOAD TEST SUITE")
    logger.info("=" * 60)
    
    try:
        # Test the complete workflow
        if test_complete_workflow():
            logger.info("🎉 All tests passed!")
        else:
            logger.error("💥 Some tests failed!")
            
    except Exception as e:
        logger.error(f"💥 Test suite failed with error: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'metrics'