In [None]:
'''
This notebook works for processing GRIB files of GFS from the NCAR archive.
It uses pygrib library, but you can use xarray as well. It also provides xr.to_dataframe method, which is very straightforward but can be time consuming.

This is supposed to be used in google Colaboratory, where pygrib works somehow better.
'''

In [None]:
#If needed:
#!pip install pygrib

In [8]:
#function to clean folder you are about to use
import os, shutil

def clean_content(folder):

  for filename in os.listdir(folder):
    file_path = os.path.join(folder, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

In [10]:
clean_content('/content/temp_dir/')

In [None]:
# Adjusted code with latitude and longitude information
import os
import tarfile
import pandas as pd
import pygrib

# Replace this with the actual path to your tar files directory
tar_files_directory = '/content/'
# Temporary directory to extract files
temp_dir = '/content/temp_dir/'

# Dictionary to store extracted information
data = {
    'level': [],
    'param_name': [],
    'param_value': [],
    'filename': [],
    'forecast_step': [],
    'Date': [],
    'time': [],
    'latitude': [],  # Add latitude
    'longitude': []  # Add longitude
}

# Iterate through each tar file in the directory
for tar_file in os.listdir(tar_files_directory):
    if tar_file.endswith(".tar"):
        tar_file_path = os.path.join(tar_files_directory, tar_file)

        # Step 1: Extract all files from the tar archive into the temporary directory
        with tarfile.open(tar_file_path, 'r') as tar:
            tar.extractall(path=temp_dir)

        # Step 2: Use pygrib.open to open each grib file and extract information
        grib_files = [os.path.join(temp_dir, file) for file in os.listdir(temp_dir)]

        for grib_file in grib_files:
            with pygrib.open(grib_file) as grbs:
                for grb in grbs:
                    data['level'].append(grb['level'])
                    data['param_name'].append(grb['name'])
                    data['param_value'].append(grb.values)
                    data['filename'].append(os.path.basename(tar_file))
                    data['forecast_step'].append(grb['step'])
                    data['Date'].append(grb.validityDate)
                    data['time'].append(grb.validityTime)
                    data['latitude'].append(grb['latitudes'])
                    data['longitude'].append(grb['longitudes'])

        # Clear the temporary directory
        for file in os.listdir(temp_dir):
            file_path = os.path.join(temp_dir, file)
            os.remove(file_path)

# Step 4: Concatenate all the data into one DataFrame
df = pd.DataFrame(data)

# Display the concatenated DataFrame
print(df)


In [None]:
# There is time in the format 0, 600, 1800, this cell fixes the format and creates datetime Column

df['time'] = pd.to_numeric(df['time'], errors='coerce')

df['time'] = df['time'].astype(str).str.zfill(4)

# Concatenate 'valid' and 'time' to create a new datetime column

df['datetime_str'] = df['Date'].astype(str) + df['time'].astype(str).str.zfill(4)
# Convert the concatenated string to datetime
df['datetime'] = pd.to_datetime(df['datetime_str'], format='%Y%m%d%H%M')
