In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore', SyntaxWarning)

from satpy.scene import Scene
from satpy.utils import debug_on

from datetime import datetime

from glob import glob

# Get current date

In [3]:
# Get the current UTC time
now = datetime.utcnow()

# Extract year, month, day, and hour
year = now.year
month = now.month
day = now.day
hour = now.hour

# Convert the current date to Julian day
julian_day = now.timetuple().tm_yday

#print the current date
print(julian_day)

165


In [8]:
# Construct the S3 prefix
prefix = f'ABI-L1b-RadF/{year}/{julian_day:03d}/{hour:02d}/'

print(prefix)

ABI-L1b-RadF/2024/165/14/


## Check if its available in s3 bucket

In [9]:
import boto3
from botocore import UNSIGNED
from botocore.client import Config

# Initialize a session using anonymous access
s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))

# Define the bucket name and prefix
bucket_name = 'noaa-goes16'
Prefix = prefix # Adjust based on your date and time requirements

# List objects within the specified prefix
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=Prefix)

# Check if there are any contents in the response
if 'Contents' in response:
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No files found in the specified path.")


ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C01_G16_s20241651400209_e20241651409517_c20241651409556.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C01_G16_s20241651410209_e20241651419517_c20241651419546.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C02_G16_s20241651400209_e20241651409517_c20241651409544.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C02_G16_s20241651410209_e20241651419517_c20241651419558.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C03_G16_s20241651400209_e20241651409517_c20241651409566.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C03_G16_s20241651410209_e20241651419517_c20241651419563.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C04_G16_s20241651400209_e20241651409517_c20241651409537.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C04_G16_s20241651410209_e20241651419517_c20241651419535.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C05_G16_s20241651400209_e20241651409517_c20241651409562.nc
ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C05_G16_s20241651410209_e20241651419517

# Now let's see if we can see how many channels are there and composite variables without downloading the data 

#

To determine the available channels and composite variables in an S3 bucket for NOAA GOES data without downloading the data, you can list the objects in the bucket and parse their filenames. This approach leverages the naming convention of the files to identify different channels and composites.

In [17]:
# first step would be to setup a S3 client using boto3 package for anonymous access to the NOAA GOES S3 bucket.

import boto3
from botocore import UNSIGNED
from botocore.client import Config
from collections import defaultdict

# Initialize the S3 client with anonymous access
s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))

In [18]:
s3_client?

[0;31mType:[0m        S3
[0;31mString form:[0m <botocore.client.S3 object at 0x7f146616e710>
[0;31mFile:[0m        /srv/conda/envs/notebook/lib/python3.11/site-packages/botocore/client.py
[0;31mDocstring:[0m   <no docstring>

In [59]:
# Now as you have created a client to connect the s3 bucket, we need to specify which s3 bucket we want to acess

# Define the bucket name and prefix
bucket_name = 'noaa-goes16'
prefix = f'ABI-L1b-RadF/{year}/{julian_day:03d}/{hour:02d}/' # Adjust based on your date and time requirements

# List objects within the specified prefix to get file names
response = s3_client.list_objects_v2(Bucket = bucket_name, Prefix = prefix)

# Initialize dictionaries to store channels and composites
channels = defaultdict(int)
composites = defaultdict(int)



In [60]:
response.keys()

dict_keys(['ResponseMetadata', 'IsTruncated', 'Contents', 'Name', 'Prefix', 'MaxKeys', 'EncodingType', 'KeyCount'])

In [61]:
response['Contents']

[{'Key': 'ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C01_G16_s20241651400209_e20241651409517_c20241651409556.nc',
  'LastModified': datetime.datetime(2024, 6, 13, 14, 10, 25, tzinfo=tzlocal()),
  'ETag': '"1c1b4497b42a0a2f9bc1d9cbe0a976d0-8"',
  'Size': 66120187,
  'StorageClass': 'STANDARD'},
 {'Key': 'ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C01_G16_s20241651410209_e20241651419517_c20241651419546.nc',
  'LastModified': datetime.datetime(2024, 6, 13, 14, 20, 30, tzinfo=tzlocal()),
  'ETag': '"c3315e8cf88406ac47aa7d523a467794-9"',
  'Size': 67747901,
  'StorageClass': 'STANDARD'},
 {'Key': 'ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C01_G16_s20241651420209_e20241651429517_c20241651429565.nc',
  'LastModified': datetime.datetime(2024, 6, 13, 14, 30, 25, tzinfo=tzlocal()),
  'ETag': '"df251e3326646c4737c80472337ebcd8-9"',
  'Size': 69450852,
  'StorageClass': 'STANDARD'},
 {'Key': 'ABI-L1b-RadF/2024/165/14/OR_ABI-L1b-RadF-M6C01_G16_s20241651430209_e20241651439517_c20241651439564.nc',
 

In [62]:
# Process the list of files
if 'Contents' in response:
    for obj in response['Contents']:
        key = obj['Key']
        filename = key.split('/')[-1]
        parts = filename.split('_')
        
        if len(parts) > 2:
            channel = parts[1][-3:]  # This extracts the channel information (e.g., RadC02)
            channels[channel] += 1
        

# Print the available channels and their counts
print("Available channels and their counts:")
for channel, count in channels.items():
    print(f"{channel}: {count}")


Available channels and their counts:
C01: 6
C02: 6
C03: 6
C04: 6
C05: 6
C06: 6
C07: 6
C08: 6
C09: 6
C10: 6
C11: 6
C12: 6
C13: 6
C14: 6
C15: 6
C16: 6


# If you run the above code again and again you would see that it keeps getting updated!!
### So how to get the most updated data This is called real time access of data



In [10]:
# import os
# import s3fs

# # Initialize the S3 file system with anonymous access
# s3 = s3fs.S3FileSystem(anon=True)

# # Define the bucket name and prefix
# bucket_name = 'noaa-goes16'
# prefix = 'ABI-L1b-RadF/2024/165/00'  # Adjust based on your date and time requirements

# # List files in the specified S3 bucket and prefix
# s3_files = s3.glob(f's3://{bucket_name}/{prefix}/*.nc')

# # Ensure the local directory exists
# local_dir = '/tmp/goes_data'
# os.makedirs(local_dir, exist_ok=True)

# # Download files locally
# local_files = []
# for s3_file in s3_files[0:3]:
#     local_path = os.path.join(local_dir, os.path.basename(s3_file))
#     s3.get(s3_file, local_path)
#     local_files.append(local_path)

# # Print the local files to verify
# print("Downloaded files:")
# for local_file in local_files:
#     print(local_file)

# # Check if local_files list is empty
# if not local_files:
#     raise ValueError("No files were downloaded. Please check the S3 path and ensure it contains data.")


In [11]:
# from satpy import Scene

# # Use Satpy to read and process the downloaded files
# scn = Scene(filenames=local_files, reader='abi_l1b')

# # Load desired datasets (e.g., true color)
# scn.load(['true_color'])

# # Save the result to a file or display it
# output_path = os.path.join(local_dir, 'true_color_full_disk.png')
# scn.save_datasets(filename=output_path, datasets=['true_color'])

# print(f"True color image saved at: {output_path}")