In [1]:
import netCDF4 as nc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Specify your file name (change this to your actual file name)
file_name = "MERRA2_400.tavg1_2d_flx_Nx.20240901.nc4"

# Open the .nc4 file
dataset = nc.Dataset(file_name, 'r')

# Print basic information about the file
print("File information:")
print(dataset)

# List all variables available in the dataset
print("\nVariables available in the dataset:")
print(dataset.variables.keys())

# Select a specific variable to explore (change 'variable_name' to a real variable)
# Example: variable = dataset.variables['Temperature']
variable_name = 'Temperature'  # Replace with an actual variable from your dataset

# Check if the variable exists in the dataset
if variable_name in dataset.variables:
    variable_data = dataset.variables[variable_name][:]
    print(f"\nData for variable '{variable_name}':")
    print(variable_data)

    # Convert variable data to a Pandas DataFrame (for 1D and 2D data)
    if variable_data.ndim == 1:
        df = pd.DataFrame(variable_data, columns=[variable_name])
    elif variable_data.ndim == 2:
        df = pd.DataFrame(variable_data)
    else:
        print(f"Variable '{variable_name}' has {variable_data.ndim} dimensions. Visualization may not be straightforward.")

    # If the data is 1D, create a line plot
    if variable_data.ndim == 1:
        plt.figure(figsize=(10, 6))
        plt.plot(df[variable_name], label=variable_name)
        plt.title(f"Line Plot of {variable_name}")
        plt.xlabel("Index")
        plt.ylabel(variable_name)
        plt.legend()
        plt.grid()
        plt.show()

    # If the data is 2D, create a heatmap
    elif variable_data.ndim == 2:
        plt.figure(figsize=(10, 6))
        sns.heatmap(df, cmap="viridis", cbar=True)
        plt.title(f"Heatmap of {variable_name}")
        plt.xlabel("Column Index")
        plt.ylabel("Row Index")
        plt.show()

    # Save the DataFrame to CSV (optional)
    csv_file_name = f"{variable_name}.csv"
    df.to_csv(csv_file_name, index=False)
    print(f"\nVariable '{variable_name}' has been saved to {csv_file_name}")

else:
    print(f"Variable '{variable_name}' not found in the dataset. Please check the variable name and try again.")

# Close the dataset
dataset.close()


File information:
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    History: Original file generated: Wed Sep 11 20:40:43 2024 GMT
    Comment: GMAO filename: d5124_m2_jan10.tavg1_2d_flx_Nx.20240901.nc4
    Filename: MERRA2_400.tavg1_2d_flx_Nx.20240901.nc4
    Conventions: CF-1
    Institution: NASA Global Modeling and Assimilation Office
    References: http://gmao.gsfc.nasa.gov
    Format: NetCDF-4/HDF-5
    SpatialCoverage: global
    VersionID: 5.12.4
    TemporalRange: 1980-01-01 -> 2016-12-31
    identifier_product_doi_authority: http://dx.doi.org/
    ShortName: M2T1NXFLX
    GranuleID: MERRA2_400.tavg1_2d_flx_Nx.20240901.nc4
    ProductionDateTime: Original file generated: Wed Sep 11 20:40:43 2024 GMT
    LongName: MERRA2 tavg1_2d_flx_Nx: 2d,1-Hourly,Time-Averaged,Single-Level,Assimilation,Surface Flux Diagnostics
    Title: MERRA2 tavg1_2d_flx_Nx: 2d,1-Hourly,Time-Averaged,Single-Level,Assimilation,Surface Flux Diagnostics
    Southernmo

In [7]:
import netCDF4 as nc

# Open the NetCDF file
dataset = nc.Dataset('MERRA2_20240828testcopy.nc4')
variable = dataset.variables['time']

# Print the dimensions of a specific variable
print(dataset.variables['time'])
print(variable.dimensions)




<class 'netCDF4._netCDF4.Variable'>
int32 time(time)
    long_name: time
    units: minutes since 2024-08-28 00:30:00
    time_increment: 10000
    begin_date: 20240828
    begin_time: 3000
    vmax: 1000000000000000.0
    vmin: -1000000000000000.0
    valid_range: [-1.e+15  1.e+15]
unlimited dimensions: 
current shape = (24,)
filling off
('time',)
The first dimension is time.


In [12]:
import netCDF4 as nc

# Open the NetCDF file
file_path = 'MERRA2_20240828testcopy.nc4'  # Replace with your file path
dataset = nc.Dataset(file_path, 'r')

# Specify the variable name
var_name = 'time'  # Replace with the variable name you want to analyze

# Check if the variable exists and examine its structure
if var_name in dataset.variables:
    variable = dataset.variables[var_name]
    print(f"Variable '{var_name}' details:")
    print(f"Dimensions: {variable.dimensions}")
    print(f"Shape: {variable.shape}")

    # Verify if 'time' is the first dimension
    if variable.dimensions[0] == 'time':
        print("The first dimension is 'time'.")
    else:
        print(f"The first dimension is '{variable.dimensions[0]}'.")


# Close the dataset when done
dataset.close()


Variable 'time' details:
Dimensions: ('time',)
Shape: (24,)
The first dimension is 'time'.


In [14]:
import netCDF4 as nc

# Open the NetCDF file
file_path = 'MERRA2_20240828testcopy.nc4'  # Replace with your file path
dataset = nc.Dataset(file_path, 'r')

# Specify the variable name you want to analyze
var_name = 'PRECTOT'  # Replace with your variable name

# Check if the variable exists in the dataset
if var_name in dataset.variables:
    # Get the variable object
    variable = dataset.variables[var_name]
    
    # Print the entire tuple of dimensions
    print(f"Dimensions of variable '{var_name}': {variable.dimensions}")
else:
    print(f"Variable '{var_name}' not found in the dataset.")

# Close the dataset when done
dataset.close()


Dimensions of variable 'PRECTOT': ('time', 'lat', 'lon')
