In [None]:
import os
import time
import requests
from datetime import datetime, timedelta
import calendar
import netrc
from http.cookiejar import CookieJar
from urllib.parse import urlparse
from dotenv import load_dotenv

# Define the download directory
download_dir = r"C:\Users\ssara\Desktop\Bootcamp\Data_Analytics\Final_Project\NASA_Dataset\downloaded_files"
os.makedirs(download_dir, exist_ok=True)

# NASA Earthdata authentication setup
def setup_earthdata_auth():
"""Set up authentication for NASA Earthdata"""
    
    # First try to get credentials from environment variables
    username = os.getenv("EARTHDATA_USERNAME")
    password = os.getenv("EARTHDATA_PASSWORD")
    
    # If not found, load from the earthdata.env file
    if not username or not password:
        load_dotenv(dotenv_path="earthdata.env")
        username = os.getenv("EARTHDATA_USERNAME")
        password = os.getenv("EARTHDATA_PASSWORD")
    
    if not username or not password:
        raise ValueError("Earthdata credentials not found. Please set environment variables or create an earthdata.env file.")

    # Create a session with a cookie jar
    session = requests.Session()
    session.cookies = CookieJar()
    
    # Configure session with authentication
    session.auth = (username, password)
    
    # Create a .netrc file if it doesn't exist
    home_dir = os.path.expanduser("~")
    netrc_file = os.path.join(home_dir, ".netrc")
    
    if not os.path.exists(netrc_file):
        with open(netrc_file, "w") as f:
            f.write(f"machine urs.earthdata.nasa.gov login {username} password {password}\n")
            f.write(f"machine opendap.earthdata.nasa.gov login {username} password {password}\n")
        
        # Set appropriate permissions for the netrc file on Unix systems
        try:
            os.chmod(netrc_file, 0o600)  # Read/write permissions for owner only
        except Exception as e:
            print(f"Warning: Could not change .netrc permissions. {e}")
        
        print(f"Created .netrc file at {netrc_file}")
    
    return session

# Function to download a single file with OAuth authentication
def download_file(url, output_file, session, retries=5, delay=10):
    for attempt in range(retries):
        try:
            print(f"Downloading {url}")
            
            # Make the request with the authenticated session
            response = session.get(url, timeout=60, allow_redirects=True)
            response.raise_for_status()
            
            # Save the file content
            with open(output_file, "wb") as f:
                f.write(response.content)
            
            print(f"Downloaded and saved to {output_file}")
            return True
        except requests.exceptions.RequestException as e:
            print(f"Error downloading {url}: {e}")
            if attempt < retries - 1:
                print(f"Retrying in {delay} seconds... (Attempt {attempt + 2})")
                time.sleep(delay)
            else:
                print(f"Failed to download after {retries} attempts.")
                return False

# Function to get valid days in a month
def get_valid_days(year, month):
    _, days_in_month = calendar.monthrange(int(year), int(month))
    return [f"{day:02d}" for day in range(1, days_in_month + 1)]

# Set up the authenticated session
session = setup_earthdata_auth()

# Define years and months to download
years = ["2019"]
months = [f"{i:02d}" for i in range(1, 13)]  # All months

# OpenDAP base URL for CYGNSS microplastic data
base_url = "https://opendap.earthdata.nasa.gov/collections/C2893924134-POCLOUD/granules"

# Download files for each year/month/day
for year in years:
    for month in months:
        valid_days = get_valid_days(year, month)
        
        for day in valid_days:
            # Construct the file name according to CYGNSS naming convention
            file_name = f"cyg.ddmi.s{year}{month}{day}-120000-e{year}{month}{day}-120000.l3.grid-microplastic.a32.d33.dap.nc4"
            
            # Construct the full URL with OpenDAP link pattern
            file_url = f"{base_url}/{file_name}"
            
            # Output file name without the 'dap' part as it's typically just for the OpenDAP service
            local_file_name = f"cyg.ddmi.s{year}{month}{day}-120000-e{year}{month}{day}-120000.l3.grid-microplastic.a32.d33.nc4"
            output_file = os.path.join(download_dir, local_file_name)
            
            # Skip if file already exists
            if os.path.exists(output_file):
                print(f"File already exists: {output_file}")
                continue
            
            # Download the file
            success = download_file(file_url, output_file, session)
            
            if not success:
                print(f"Moving on to next file after failure: {file_name}")
            
            # Add a small delay between requests to avoid overwhelming the server
            time.sleep(2)