In [27]:
!pip install netCDF4



In [31]:
!pip install cftime



In [32]:
import cftime

print(f"cftime version: {cftime.__version__}")


cftime version: 1.6.4.post1


In [40]:
import xarray as xr
import cftime
import netCDF4

# Open the dataset with decode_times enabled
ds = xr.open_dataset('air.mon.1981-2010.ltm.nc', decode_times=False)

# Convert time values to formatted strings
time_values = ds['time'].values
time_units = ds['time'].attrs['units']  # e.g., "days since 1800-01-01 00:00:0.0"
calendar = ds['time'].attrs.get('calendar', 'standard')

# Convert to datetime objects
dates = netCDF4.num2date(time_values, units=time_units, calendar=calendar)


# Optionally, convert to a list for easier handling
human_readable_dates = dates.tolist()

for date in human_readable_dates[:12]:
    print(date.strftime('%Y-%m-%d'))


0001-01-01
0001-02-01
0001-03-01
0001-04-01
0001-05-01
0001-06-01
0001-07-01
0001-08-01
0001-09-01
0001-10-01
0001-11-01
0001-12-01


In [47]:
air_da = ds['air']

# Convert the DataArray to a Pandas DataFrame
df = air_da.to_dataframe().reset_index()

# Rename columns for clarity (optional but recommended)
df.rename(columns={
    'time': 'Date',
    'level': 'Pressure_Level_hPa',
    'lat': 'Latitude',
    'lon': 'Longitude',
    'air': 'Air_Temperature'
}, inplace=True)

# Display the first few rows of the DataFrame
print(df.head())

       Date  Pressure_Level_hPa  Latitude  Longitude  Air_Temperature
0 -657073.0              1000.0      90.0        0.0       -28.217329
1 -657073.0              1000.0      90.0        2.5       -28.217329
2 -657073.0              1000.0      90.0        5.0       -28.217329
3 -657073.0              1000.0      90.0        7.5       -28.217329
4 -657073.0              1000.0      90.0       10.0       -28.217329
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2144448 entries, 0 to 2144447
Data columns (total 5 columns):
 #   Column              Dtype  
---  ------              -----  
 0   Date                float64
 1   Pressure_Level_hPa  float32
 2   Latitude            float32
 3   Longitude           float32
 4   Air_Temperature     float32
dtypes: float32(4), float64(1)
memory usage: 49.1 MB
None


In [46]:
# Check the memory usage of the DataFrame
print(df.memory_usage(deep=True))

# Convert data types to more efficient types if possible
df['Pressure_Level_hPa'] = df['Pressure_Level_hPa'].astype('float32')
df['Latitude'] = df['Latitude'].astype('float32')
df['Longitude'] = df['Longitude'].astype('float32')
df['Air_Temperature'] = df['Air_Temperature'].astype('float32')

# Verify the updated memory usage
print(df.memory_usage(deep=True))


Index                      128
Date                  17155584
Pressure_Level_hPa     8577792
Latitude               8577792
Longitude              8577792
Air_Temperature        8577792
dtype: int64
Index                      128
Date                  17155584
Pressure_Level_hPa     8577792
Latitude               8577792
Longitude              8577792
Air_Temperature        8577792
dtype: int64


In [49]:
df.rename(columns={
    'time': 'Date',
    'level': 'Pressure_Level_hPa',
    'lat': 'Latitude',
    'lon': 'Longitude',
    'air': 'Air_Temperature'
}, inplace=True)
print(df.head())

       Date  Pressure_Level_hPa  Latitude  Longitude  Air_Temperature
0 -657073.0              1000.0      90.0        0.0       -28.217329
1 -657073.0              1000.0      90.0        2.5       -28.217329
2 -657073.0              1000.0      90.0        5.0       -28.217329
3 -657073.0              1000.0      90.0        7.5       -28.217329
4 -657073.0              1000.0      90.0       10.0       -28.217329


In [50]:
# Step 7: Calculate spatial points per time
num_levels = ds.dims['level']      # 17
num_lats = ds.dims['lat']          # 73
num_lons = ds.dims['lon']          # 144
spatial_points_per_time = num_levels * num_lats * num_lons  # 178,464

# Step 8: Number of time points
num_times = ds.dims['time']        # 12

# Step 9: Expected DataFrame length
expected_length = num_times * spatial_points_per_time  # 2,141,568
actual_length = len(df)
print(f"Expected DataFrame length: {expected_length}")
print(f"Actual DataFrame length: {actual_length}")


Expected DataFrame length: 2144448
Actual DataFrame length: 2144448


  num_levels = ds.dims['level']      # 17
  num_lats = ds.dims['lat']          # 73
  num_lons = ds.dims['lon']          # 144
  num_times = ds.dims['time']        # 12


In [57]:
import numpy as np
# Step 10: Replace 'Date' column
if expected_length == actual_length:
    # Correct replacement
    repeated_dates = np.repeat(human_readable_dates, spatial_points_per_time)
    df['Date'] = repeated_dates
    print(df.head())
else:
    print('ERROR: expected and actual length do not match')

                  Date  Pressure_Level_hPa  Latitude  Longitude  \
0  0001-01-01 00:00:00              1000.0      90.0        0.0   
1  0001-01-01 00:00:00              1000.0      90.0        2.5   
2  0001-01-01 00:00:00              1000.0      90.0        5.0   
3  0001-01-01 00:00:00              1000.0      90.0        7.5   
4  0001-01-01 00:00:00              1000.0      90.0       10.0   

   Air_Temperature  
0       -28.217329  
1       -28.217329  
2       -28.217329  
3       -28.217329  
4       -28.217329  


In [69]:
# Step 11: Convert 'Date' column to string type (if not already)
df['Date'] = df['Date'].astype(str)


print(df[df['Date'] == '0001-01-01 00:00:00'])

                       Date  Pressure_Level_hPa  Latitude  Longitude  \
0       0001-01-01 00:00:00              1000.0      90.0        0.0   
1       0001-01-01 00:00:00              1000.0      90.0        2.5   
2       0001-01-01 00:00:00              1000.0      90.0        5.0   
3       0001-01-01 00:00:00              1000.0      90.0        7.5   
4       0001-01-01 00:00:00              1000.0      90.0       10.0   
...                     ...                 ...       ...        ...   
178699  0001-01-01 00:00:00                10.0     -90.0      347.5   
178700  0001-01-01 00:00:00                10.0     -90.0      350.0   
178701  0001-01-01 00:00:00                10.0     -90.0      352.5   
178702  0001-01-01 00:00:00                10.0     -90.0      355.0   
178703  0001-01-01 00:00:00                10.0     -90.0      357.5   

        Air_Temperature  
0            -28.217329  
1            -28.217329  
2            -28.217329  
3            -28.217329  
4    