# Project Environment Setup
This notebook will set up the complete environment for all project notebooks. It handles:
* Package installation
* Directory structure creation
* Environment variables configuration
* Environment validation

## Steps Overview
1. Create project directory structure
2. Install required packages from requirements.txt
3. Set up environment variables
4.

## Environment Validation
Let's verify that everything is set up correctly. The following checks will be performed:
* Essential package imports
* Environment variables configuration
* Directory structure

In [2]:
# Environment Setup and Package Installation
import sys
import subprocess
from pathlib import Path



def create_project_structure():
    """Create basic project directory structure"""
    print("📂 Checking Project Structure\n")

    directories = [
        'data',
        'notebooks',
        'output',
        'scripts',
        'api_responses',
        'models'
    ]

    existing_dirs = []
    created_dirs = []

    for dir_name in directories:
        path = Path(dir_name)
        if path.exists():
            existing_dirs.append(dir_name)
            print(f"ℹ️ {dir_name}/ already exists")
        else:
            path.mkdir(exist_ok=True)
            created_dirs.append(dir_name)
            print(f"✅ Created {dir_name}/")

    if not created_dirs:
        print("All directories already exist")


def create_env_files():
    """Create environment variable template files"""
    print("\n📝 Checking Environment Files\n")

    env_template = """# API Credentials
ONET_API_USERNAME=your_username_here
ONET_API_PASSWORD=your_password_here
ONET_API_KEY=your_key_here

# Data Paths
NAICS_codes_path=data/2022_NAICS_Structure_Summary_Table - industry.csv
SOC_codes_path=data/soc_structure_2018 - occupations.csv
declining_path=data/Fastest declining occupations.html.html
growing_path=data/Fastest growing occupations.html.html
"""

    # Check env_template.env
    if not Path('env_template.env').exists():
        with open('env_template.env', 'w') as f:
            f.write(env_template)
        print("✅ Created env_template.env")
    else:
        print("ℹ️ env_template.env already exists")

    # Check env_var.env
    if not Path('env_var.env').exists():
        with open('env_var.env', 'w') as f:
            f.write(env_template)
        print("✅ Created env_var.env")
    else:
        print("ℹ️ env_var.env already exists")


def install_requirements():
    """Install required packages"""
    print("\n📦 Checking Required Packages\n")

    requirements = [
        'pandas',
        'numpy',
        'requests',
        'python-dotenv',
        'beautifulsoup4',
        'jupyter',
        'sqlalchemy',
        'matplotlib',
        'seaborn',
        'thefuzz',
        'duckdb'
    ]

    installed = []
    need_install = []

    for package in requirements:
        try:
            pkg_resources.require(package)
            installed.append(package)
            print(f"✅ {package} already installed")
        except pkg_resources.DistributionNotFound:
            need_install.append(package)
            print(f"📥 Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])

    if not need_install:
        print("\nAll required packages already installed")


def main():
    """Main setup function"""
    print("🚀 Starting Project Environment Setup\n")

    # Check project structure
    create_project_structure()

    # Check environment files 
    create_env_files()

    # Check required packages
    install_requirements()

    print("\n✨ Environment Setup Complete!")
    print("\nNext Steps:")
    print("1. Update env_var.env with your actual credentials and paths")
    print("2. Place your data files in the data/ directory")
    print("3. Start working with your notebooks in the notebooks/ directory")


if __name__ == "__main__":
    main()


🚀 Starting Project Environment Setup

📂 Checking Project Structure

ℹ️ data/ already exists
ℹ️ notebooks/ already exists
ℹ️ output/ already exists
ℹ️ scripts/ already exists
ℹ️ api_responses/ already exists
ℹ️ models/ already exists
All directories already exist

📝 Checking Environment Files

ℹ️ env_template.env already exists
ℹ️ env_var.env already exists

📦 Checking Required Packages

✅ pandas already installed
✅ numpy already installed
✅ requests already installed
✅ python-dotenv already installed
✅ beautifulsoup4 already installed
✅ jupyter already installed
✅ sqlalchemy already installed
✅ matplotlib already installed
✅ seaborn already installed
✅ thefuzz already installed
✅ duckdb already installed

All required packages already installed

✨ Environment Setup Complete!

Next Steps:
1. Update env_var.env with your actual credentials and paths
2. Place your data files in the data/ directory
3. Start working with your notebooks in the notebooks/ directory
