In [5]:
import os
import gzip
import shutil
import requests
import rasterio
import numpy as np
import pandas as pd
from rasterio.windows import from_bounds

In [6]:
def get_auth_token(username, password):
    auth_url = 'https://eogauth.mines.edu/auth/realms/master/protocol/openid-connect/token'
    auth_data = {
        'client_id': 'eogdata_oidc',
        'client_secret': '2677ad81-521b-4869-8480-6d05b9e57d48',
        'username': username,
        'password': password,
        'grant_type': 'password'
    }
    response = requests.post(auth_url, data=auth_data)
    if response.status_code == 200:
        return response.json()['access_token']
    else:
        raise Exception(f"Failed to authenticate: {response.text}")

In [7]:
def download_and_process_file(file_url, roi_bounds, access_token, month, aggregated_data):
    headers = {"Authorization": f"Bearer {access_token}"}
    temp_dir = "temp_tif"

    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    compressed_file = os.path.join(temp_dir, "file.gz")  # Use .gz extension for compressed files

    try:
        # Download the .gz file
        print(f"Downloading file: {file_url}")
        response = requests.get(file_url, headers=headers, stream=True)
        if response.status_code != 200:
            print(f"Failed to download file. Status code: {response.status_code}")
            return aggregated_data

        with open(compressed_file, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                f.write(chunk)
        print("Download complete")

        # Decompress the .gz file
        print("Extracting .gz file...")
        with gzip.open(compressed_file, 'rb') as f_in:
            extracted_file = os.path.join(temp_dir, 'file.tif')  # Output file for .gz extraction
            with open(extracted_file, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        print("Extraction complete")

        # Process the extracted .tif file
        tif_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.tif')]
        if not tif_files:
            print("No .tif file found in the extracted contents.")
            return aggregated_data

        local_tif = tif_files[0]

        # Process the .tif file
        print(f"Processing file {local_tif}...")
        with rasterio.open(local_tif) as src:
            window = from_bounds(*roi_bounds, src.transform)
            window = rasterio.windows.Window(
                col_off=int(window.col_off),
                row_off=int(window.row_off),
                width=int(window.width),
                height=int(window.height))

            data = src.read(1, window=window)
            print(f"Data shape: {data.shape}")

            if data.shape[0] == 0:
                print(f"No data within the specified bounds: {roi_bounds}")
                return aggregated_data

            rows, cols = np.indices(data.shape)

            xs, ys = rasterio.transform.xy(
                src.transform,
                rows.flatten() + window.row_off,
                cols.flatten() + window.col_off,
                offset='center')

            chunk_df = pd.DataFrame({
                'longitude': xs,
                'latitude': ys,
                f'radiance_month_{month}': data.flatten()})

            print(f"Before applying mask: {chunk_df.shape[0]} rows")

            mask = (
                (chunk_df['longitude'] >= roi_bounds[0]) &
                (chunk_df['longitude'] <= roi_bounds[2]) &
                (chunk_df['latitude'] >= roi_bounds[1]) &
                (chunk_df['latitude'] <= roi_bounds[3]))
            chunk_df = chunk_df[mask]

            print(f"After applying mask: {chunk_df.shape[0]} rows")

            if not chunk_df.empty:
                if aggregated_data.empty:
                    aggregated_data = chunk_df
                else:
                    aggregated_data = pd.merge(
                        aggregated_data,
                        chunk_df,
                        on=['longitude', 'latitude'],
                        how='outer')

        print("Processing complete")
        return aggregated_data

    finally:
        if os.path.exists(compressed_file):
            os.remove(compressed_file)
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)

In [8]:
username = "omarbassam55555@gmail.com" 
password = "dontknow_5"  

urls = {
    "1": "https://eogdata.mines.edu/nighttime_light/monthly_notile/v10/2020/202001/vcmcfg/SVDNB_npp_20200101-20200131_global_vcmcfg_v10_c202002111500.avg_rade9h.tif.gz"
    }

roi_bounds = (-73.9855, -33.7500, -34.7939, 5.2718) # Accurate ROI bounds for Brazil
output_file = r"E:\UNI\Fall_2024\CSCI322 Data Analysis\Project\repo\Data_Extracting\Data\Brazil_Viirs.csv"

try:
    access_token = get_auth_token(username, password)
    aggregated_data = pd.DataFrame()

    for month, url in urls.items():
        print(f"Processing data for {month}")
        aggregated_data = download_and_process_file(url, roi_bounds, access_token, month, aggregated_data)

        # Save final data to CSV
    if not aggregated_data.empty:
        aggregated_data.to_csv(output_file, index=False)
        print(f"Data saved to {output_file}")
    else:
        print("No data available within the specified bounds.")
except Exception as e:
    print(f"Error: {e}")

Processing data for 1
Downloading file: https://eogdata.mines.edu/nighttime_light/monthly_notile/v10/2020/202001/vcmcfg/SVDNB_npp_20200101-20200131_global_vcmcfg_v10_c202002111500.avg_rade9h.tif.gz
Download complete
Extracting .gz file...
Extraction complete
Processing file temp_tif\file.tif...
Data shape: (9365, 9405)
Before applying mask: 88077825 rows
After applying mask: 88068460 rows
Processing complete
Data saved to E:\UNI\Fall_2024\CSCI322 Data Analysis\Project\repo\Data_Extracting\Data\Brazil_Viirs.csv
