# GOES historical data download with respect to labeled AR data

This is the code to download historical data.

In [None]:
import os

# Directory where files are stored
download_dir = 'labeled_AR'

# List to store file names
files = []

# Iterate over files in the directory
for file_name in os.listdir(download_dir):
    if file_name.endswith('.nc'):
        files.append(os.path.join(download_dir, file_name))

# Sort the list of file names
files.sort()
files

In [None]:
import os
import requests
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta

# Directory to save the downloaded files
save_dir = ""
os.makedirs(save_dir, exist_ok=True)

# Base URL
base_url = "https://www.ncei.noaa.gov/data/gridsat-goes/access/goes"

# Specify start and end dates
start_date = datetime(1996, 9, 26, 0, 0)
end_date = datetime(1996, 10, 5, 23, 0)

# Function to generate URLs based on date range
def generate_urls(start_date, end_date):
    urls = []
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y.%m.%d.%H00")
        filename = f"GridSat-GOES.goes08.{date_str}.v01.nc"
        url = f"{base_url}/{current_date.year}/{current_date.strftime('%m')}/{filename}"
        urls.append(url)
        current_date += timedelta(hours=1)
    return urls

# Function to download a file
def download_file(url):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        filename = os.path.join(save_dir, os.path.basename(url))
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        print(f"Downloaded {filename}")
    else:
        print(f"Failed to download {url}")

# Generate the list of URLs to download
urls = generate_urls(start_date, end_date)

# Use ThreadPoolExecutor to download files concurrently
with ThreadPoolExecutor(max_workers=5) as executor:
    executor.map(download_file, urls)

print("All files downloaded.")


In [None]:
files = os.listdir(os.getcwd())
files

In [None]:
import os

# Directory where files are stored
download_dir = ''

# List to store file names
files = []

# Iterate over files in the directory
for file_name in os.listdir(download_dir):
    if file_name.endswith('.nc'):
        files.append(os.path.join(download_dir, file_name))

# Sort the list of file names
files.sort()
files


In [None]:
!pip install cartopy

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import os

# Directory containing the NetCDF files
data_dir = ''
# List of NetCDF files to process
nc_files = [f for f in os.listdir(data_dir) if f.endswith('.nc')]

# Define the latitude and longitude bounds for the US
lat_bounds = [10, 100]  # Southernmost and northernmost points of the continental US
lon_bounds = [-180, -40]  # Westernmost and easternmost points of the continental US

# Function to process and plot data from a single file
def process_and_plot(nc_file):
    # Open the NetCDF file
    dataset = xr.open_dataset(os.path.join(data_dir, nc_file))
    
    # Select the variable you want to plot
    variable = dataset['ch3']
    
    # Crop the data to the US bounds
    variable_us = variable.sel(lat=slice(lat_bounds[0], lat_bounds[1]), lon=slice(lon_bounds[0], lon_bounds[1]))
    
    # Create a figure and axis with a globe projection
    fig = plt.figure(figsize=(10, 6))
    ax = plt.axes(projection=ccrs.Mercator())
    
    # Plot the variable on the globe
    variable_us.plot(ax=ax, transform=ccrs.PlateCarree(), cmap='viridis')  # Added cmap for better visualization
    
    # Add coastlines and gridlines
    ax.coastlines()
    ax.gridlines(draw_labels=True)
    
    # Set the title
    plt.title(f'Variable from {nc_file} (Cropped to US)')
    
    # Show the plot
    plt.show()

# Iterate through the NetCDF files and process them
for nc_file in nc_files:
    process_and_plot(nc_file)
