# Project Structure Verification
This notebook will check your current project setup and configuration.

In [22]:
from pathlib import Path
import os


def quick_project_check():
    print("🔍 Quick Project Verification\n")

    # Current directory
    root = Path.cwd()
    project_name = root.name
    print(f"Current directory: {project_name}")

    # Check critical files
    critical_files = ['Environment_Setup.ipynb', 'requirements.txt', 'env_var.env', 'env_template.env', 'APIkeys.json']
    print("\nCritical Files:")
    for file in critical_files:
        if (root / file).exists():
            print(f"✅ {file} found")
        else:
            print(f"❌ {file} missing")

    # Check directories
    directories = ['data', 'scripts', 'notebooks', 'output', 'api_responses', 'models']
    print("\nDirectories:")
    for dir_name in directories:
        dir_path = root / dir_name
        if dir_path.exists():
            contents = list(dir_path.glob('*'))
            print(f"✅ {dir_name}/ exists with {len(contents)} items")
        else:
            print(f"❌ {dir_name}/ missing")

    # Check notebooks location
    print("\nNotebook Locations:")
    root_notebooks = list(root.glob('*.ipynb'))
    notebook_dir_files = list((root / 'notebooks').glob('*.ipynb')) if (root / 'notebooks').exists() else []

    print("Root directory notebooks:")
    for nb in root_notebooks:
        if nb.name == 'Environment_Setup.ipynb':
            print(f"✅ {nb.name} (correct location)")
        else:
            print(f"⚠️ {nb.name} (should be in notebooks/)")

    print("\nNotebooks directory contents:")
    for nb in notebook_dir_files:
        print(f"✅ {nb.name}")


quick_project_check()


🔍 Quick Project Verification

Current directory: Term_Project_Geissinger

Critical Files:
✅ Environment_Setup.ipynb found
✅ requirements.txt found
✅ env_var.env found
✅ env_template.env found
✅ APIkeys.json found

Directories:
✅ data/ exists with 26 items
✅ scripts/ exists with 3 items
✅ notebooks/ exists with 6 items
✅ output/ exists with 19 items
✅ api_responses/ exists with 6 items
✅ models/ exists with 0 items

Notebook Locations:
Root directory notebooks:
✅ Environment_Setup.ipynb (correct location)
⚠️ Project_Verification.ipynb (should be in notebooks/)

Notebooks directory contents:
✅ milestone1_geissinger.ipynb
✅ milestone2_geissinger.ipynb
✅ milestone3_geissinger.ipynb
✅ milestone4_geissinger.ipynb
✅ milestone5_geissinger.ipynb


In [23]:
def quick_project_check():
    print("🔍 Quick Project Verification\n")

    # Current directory
    root = Path.cwd()
    project_name = root.name
    print(f"Current directory: {project_name}")

    # Check critical files
    critical_files = ['Environment_Setup.ipynb', 'requirements.txt', 'env_var.env', 'env_template.env', 'APIkeys.json']
    print("\nCritical Files:")
    for file in critical_files:
        if (root / file).exists():
            print(f"✅ {file} found")
        else:
            print(f"❌ {file} missing")

    # Check directories
    directories = ['data', 'scripts', 'notebooks', 'output', 'api_responses', 'models']
    print("\nDirectories:")
    for dir_name in directories:
        dir_path = root / dir_name
        if dir_path.exists():
            contents = list(dir_path.glob('*'))
            print(f"✅ {dir_name}/ exists with {len(contents)} items")
        else:
            print(f"❌ {dir_name}/ missing")

    # Check notebooks location 
    print("\nNotebook Locations:")
    root_notebooks = list(root.glob('*.ipynb'))
    notebook_dir_files = list((root / 'notebooks').glob('*.ipynb')) if (root / 'notebooks').exists() else []

    allowed_root_notebooks = ['Environment_Setup.ipynb', 'Project_Verification.ipynb']

    print("Root directory notebooks:")
    for nb in root_notebooks:
        if nb.name in allowed_root_notebooks:
            print(f"✅ {nb.name} (correct location)")
        else:
            print(f"⚠️ {nb.name} (should be in notebooks/)")

    print("\nNotebooks directory contents:")
    for nb in notebook_dir_files:
        print(f"✅ {nb.name}")

In [24]:
def check_notebook_paths():
    """
    Check if notebooks are using correct relative paths
    """
    print("🔍 Analyzing Notebook Paths\n")

    notebook_dir = Path('notebooks')
    if not notebook_dir.exists():
        print("❌ notebooks directory not found")
        return

    notebooks = list(notebook_dir.glob('*.ipynb'))
    root_notebooks = list(Path('.').glob('*.ipynb'))

    allowed_root_notebooks = ['Environment_Setup.ipynb', 'Project_Verification.ipynb']

    print("Notebook Location Summary:")
    if root_notebooks:
        print("\nNotebooks in root directory:")
        for nb in root_notebooks:
            if nb.name in allowed_root_notebooks:
                print(f"✅ {nb.name}")
            else:
                print(f"⚠️ {nb.name} (should be moved to notebooks/)")

    print("\nNotebooks in notebooks/ directory:")
    for nb in notebooks:
        print(f"✅ {nb.name}")


In [25]:
import os
from pathlib import Path
import glob
import sys


def verify_project_structure():
    """
    Verify the current project structure and configuration
    """
    print("🔍 Checking Project Structure\n")

    # Get current working directory
    root_dir = Path.cwd()
    print(f"📂 Project Root: {root_dir}\n")

    # Check main directories
    print("Directory Structure:")
    main_dirs = ['data', 'notebooks', 'output']
    for dir_name in main_dirs:
        dir_path = root_dir / dir_name
        if dir_path.exists():
            print(f"✅ {dir_name}/")
            # List contents
            contents = list(dir_path.glob('*'))
            if contents:
                for item in contents:
                    print(f"  └─ {item.name}")
            else:
                print("  └─ (empty)")
        else:
            print(f"❌ {dir_name}/ (missing)")

    # Check configuration files
    print("\nConfiguration Files:")
    config_files = ['requirements.txt', 'env_var.env']
    for file in config_files:
        if os.path.exists(file):
            print(f"✅ {file}")
        else:
            print(f"❌ {file} (missing)")

    # Check notebooks
    print("\nNotebook Files:")
    notebook_files = list(glob.glob('**/*.ipynb', recursive=True))
    if notebook_files:
        for nb in notebook_files:
            print(f"📓 {nb}")
    else:
        print("❌ No notebook files found")

    # Python Environment
    print("\nPython Environment:")
    print(f"Python version: {sys.version.split()[0]}")

    try:
        import pandas as pd
        import numpy as np
        print("✅ Core data science packages (pandas, numpy) are installed")
    except ImportError:
        print("❌ Core packages missing")


verify_project_structure()


🔍 Checking Project Structure

📂 Project Root: C:\Users\geiss\PycharmProjects\Term_Project_Geissinger

Directory Structure:
✅ data/
  └─ 2022_NAICS_Structure_Summary_Table - industry.csv
  └─ 2022_NAICS_Structure_Summary_Table - industry.xlsx
  └─ ai_jobs_pbi_theme.json
  └─ AI_pink.png
  └─ Fastest declining occupations.html.html
  └─ Fastest growing occupations.html.html
  └─ Git.jpg
  └─ Hands.png
  └─ industry_occupation_data.csv
  └─ job_analysis.db
  └─ LinkedIn.jpg
  └─ national_M2015_dl.csv
  └─ national_M2015_dl.xlsx
  └─ national_M2024_dl.csv
  └─ national_M2024_dl.xlsx
  └─ nem-industry-coverage.xlsx
  └─ nem-occupational-coverage.xlsx
  └─ nem-onet-to-soc-crosswalk.csv
  └─ Occupational groups.xlsx
  └─ OIP.jpg
  └─ photo-1738003667850-a2fb736e31b3.avif
  └─ premium_photo-1680608979589-e9349ed066d5.avif
  └─ Screenshot 2025-08-06 164111.png
  └─ Skills.csv
  └─ soc_structure_2018 - occupations.csv
  └─ soc_structure_2018 - occupations.xlsx
✅ notebooks/
  └─ .ipynb_checkpoint

In [26]:
def verify_env_variables():
    """
    Check environment variables setup
    """
    print("🔐 Checking Environment Variables\n")

    try:
        from dotenv import load_dotenv
        load_dotenv(dotenv_path='env_var.env', override=True)

        required_vars = [
            'NAICS_codes_path',
            'SOC_codes_path',
            'ONET_API_USERNAME',
            'ONET_API_PASSWORD',
            'onet_api_key',
            'declining_path',
            'growing_path'
        ]

        print("Environment variables status:")
        for var in required_vars:
            value = os.getenv(var)
            if value:
                print(f"✅ {var} is set")
            else:
                print(f"❌ {var} is not set")


    except ImportError:
        print("❌ python-dotenv package not installed")
    except Exception as e:
        print(f"❌ Error checking environment variables: {str(e)}")

verify_env_variables()

🔐 Checking Environment Variables

Environment variables status:
✅ NAICS_codes_path is set
✅ SOC_codes_path is set
✅ ONET_API_USERNAME is set
✅ ONET_API_PASSWORD is set
✅ onet_api_key is set
✅ declining_path is set
✅ growing_path is set


In [27]:

import os
from pathlib import Path
import glob
import sys


def check_notebook_paths():
    """
    Check if notebooks are using correct relative paths
    """
    print("🔍 Analyzing Notebook Paths\n")

    notebook_dir = Path('notebooks')
    if not notebook_dir.exists():
        print("❌ notebooks directory not found")
        return

    notebooks = list(notebook_dir.glob('*.ipynb'))
    root_notebooks = list(Path('.').glob('*.ipynb'))  # Changed this line to look in root directory

    allowed_root_notebooks = ['Environment_Setup.ipynb', 'Project_Verification.ipynb']

    print("Notebook Location Summary:")
    if root_notebooks:
        print("\nNotebooks in root directory:")
        for nb in root_notebooks:
            if nb.name in allowed_root_notebooks:
                print(f"✅ {nb.name}")
            else:
                print(f"⚠️ {nb.name} (should be moved to notebooks/)")

    print("\nNotebooks in notebooks/ directory:")
    for nb in notebooks:
        print(f"✅ {nb.name}")


# Let's also check the basic directory structure
def verify_basic_structure():
    """
    Verify basic project structure
    """
    print("📂 Checking Basic Project Structure\n")

    # Get current working directory
    root_dir = Path.cwd()
    project_name = root_dir.name
    print(f"Current directory: {project_name}\n")

    # Check main directories
    main_dirs = ['data', 'notebooks', 'output']
    for dir_name in main_dirs:
        dir_path = root_dir / dir_name
        if dir_path.exists():
            print(f"✅ {dir_name}/ exists")
            # List contents
            contents = list(dir_path.glob('*'))
            if contents:
                for item in contents:
                    print(f"  └─ {item.name}")
            else:
                print("  └─ (empty)")
        else:
            print(f"❌ {dir_name}/ missing")


# Run both checks
print("=== Directory Structure ===")
verify_basic_structure()
print("\n=== Notebook Locations ===")
check_notebook_paths()


=== Directory Structure ===
📂 Checking Basic Project Structure

Current directory: Term_Project_Geissinger

✅ data/ exists
  └─ 2022_NAICS_Structure_Summary_Table - industry.csv
  └─ 2022_NAICS_Structure_Summary_Table - industry.xlsx
  └─ ai_jobs_pbi_theme.json
  └─ AI_pink.png
  └─ Fastest declining occupations.html.html
  └─ Fastest growing occupations.html.html
  └─ Git.jpg
  └─ Hands.png
  └─ industry_occupation_data.csv
  └─ job_analysis.db
  └─ LinkedIn.jpg
  └─ national_M2015_dl.csv
  └─ national_M2015_dl.xlsx
  └─ national_M2024_dl.csv
  └─ national_M2024_dl.xlsx
  └─ nem-industry-coverage.xlsx
  └─ nem-occupational-coverage.xlsx
  └─ nem-onet-to-soc-crosswalk.csv
  └─ Occupational groups.xlsx
  └─ OIP.jpg
  └─ photo-1738003667850-a2fb736e31b3.avif
  └─ premium_photo-1680608979589-e9349ed066d5.avif
  └─ Screenshot 2025-08-06 164111.png
  └─ Skills.csv
  └─ soc_structure_2018 - occupations.csv
  └─ soc_structure_2018 - occupations.xlsx
✅ notebooks/ exists
  └─ .ipynb_checkpoints