In [None]:

import numpy as np
import netCDF4 as nc
import xarray as xr


In [None]:
ls RemoteData/RemoteSensingData/NOAA-DATA/ -lahr

In [None]:
import os
import shutil
import requests

def download_data(url, temp_dir, destination_dir):
    # Create temporary directory if it doesn't exist
    os.makedirs(temp_dir, exist_ok=True)

    # Extract filename from the URL
    filename = url.split("/")[-1]

    # Download the file to the temporary directory
    temp_file = os.path.join(temp_dir, filename)
    response = requests.get(url, stream=True)
    with open(temp_file, "wb") as f:
        shutil.copyfileobj(response.raw, f)
    del response

    # Move the file to the destination directory
    destination_file = os.path.join(destination_dir, filename)
    shutil.move(temp_file, destination_file)

    print(f"File downloaded and moved to: {destination_file}")

# Set the URL, temporary directory, and destination directory
url = "https://www.ncei.noaa.gov/data/geostationary-ir-channel-brightness-temperature-gridsat-b1/access/2019/GRIDSAT-B1.2019.01.22.15.v02r01.nc"
temp_dir = "tmp"
destination_dir = "RemoteData/RemoteSensingData/NOAA-DATA"

# Call the function to download the data
download_data(url, temp_dir, destination_dir)


In [None]:
response = requests.get("https://www.ncei.noaa.gov/data/geostationary-ir-channel-brightness-temperature-gridsat-b1/access/2019")
response.text.split("\n")

In [22]:
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup

def explore_and_download_data(base_url, start_year, end_year, temp_dir, destination_dir):
    # Create temporary directory if it doesn't exist
    os.makedirs(temp_dir, exist_ok=True)

    # Iterate over the years
    for year in range(start_year, end_year + 1):
        # Generate the URL for the specific year
        year_url = urljoin(base_url, str(year))

        # Create the destination directory for the specific year
        year_destination_dir = os.path.join(destination_dir, str(year))
        os.makedirs(year_destination_dir, exist_ok=True)

        # Get the HTML content of the year URL
        response = requests.get(year_url)
        soup = BeautifulSoup(response.content, "html.parser")

        # Find all the links on the page
        links = soup.find_all("a")

        # Iterate over the links and download the files
        for link in links:
            file_name = link.get("href")
            if file_name.endswith(".nc"):
                file_url = urljoin(year_url, file_name)
                temp_file = os.path.join(temp_dir, file_name)
                destination_file = os.path.join(year_destination_dir, file_name)

                # Download the file to the temporary directory
                response = requests.get(file_url, stream=True)
                with open(temp_file, "wb") as f:
                    response.raw.decode_content = True
                    shutil.copyfileobj(response.raw, f)
                del response

                # Move the file to the destination directory
                shutil.move(temp_file, destination_file)

                print(f"File downloaded and moved to: {destination_file}")

# Set the base URL, start year, end year, temporary directory, and destination directory
base_url = "https://www.ncei.noaa.gov/data/geostationary-ir-channel-brightness-temperature-gridsat-b1/access/"
start_year = 2019
end_year = 2023
temp_dir = "/tmp"
destination_dir = "RemoteData/RemoteSensingData/NOAA-DATA"

# Call the function to explore and download the data
explore_and_download_data(base_url, start_year, end_year, temp_dir, destination_dir)


File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.03.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.06.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.09.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.12.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.15.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.18.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.25.21.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.26.00.v02r01.nc
File downloaded and moved to: RemoteData/RemoteSensingData/NOAA-DATA/2020/GRIDSAT-B1.2020.07.26.03.v02r01.nc
File downloaded and

In [None]:
import os
import xarray as xr
import pandas as pd

# Set the base directory where the files are stored
base_dir = "RemoteData/RemoteSensingData/NOAA-DATA"

# Set the start and end years
start_year = 2019
end_year = 2023

# Create an empty list to store the data from each year
dfs = []

# Iterate over the years
for year in range(start_year, end_year + 1):
    # Set the file path for the specific year
    file_path = os.path.join(base_dir, str(year), f"GRIDSAT-B1.{year}.nc")

    # Load the NetCDF data using xarray
    data = xr.open_dataset(file_path)

    # Convert the data to a pandas DataFrame
    df = data.to_dataframe()

    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate the DataFrames from each year into a single DataFrame
combined_df = pd.concat(dfs)

# Preview the combined DataFrame
print(combined_df.head())


In [None]:
ls /thesis/Thesis_Work/Data/RemoteData/RemoteSensingData/NOAA-DATA/