This worked -final version-

In [2]:
import os
import glob
import xarray as xr
import pandas as pd
from datetime import datetime

def fix_monthly_time_coords():
    """Fix time coordinates in monthly files and merge"""
    
    monthly_files = sorted(glob.glob("monthly_averages/monthly_avg_*.nc"))
    print(f"📁 Found {len(monthly_files)} monthly files")
    
    if not monthly_files:
        print("❌ No monthly files found!")
        return False
    
    datasets = []
    
    for file in monthly_files:
        # Extract year-month from filename
        filename = os.path.basename(file)
        year_month = filename.split('_')[2].replace('.nc', '')  # monthly_avg_201505.nc -> 201505
        
        year = int(year_month[:4])
        month = int(year_month[4:6])
        
        # Create new time coordinate for first day of month
        new_time = pd.Timestamp(year, month, 1)
        
        print(f"Processing {filename}: {year_month} -> {new_time.strftime('%Y-%m-%d')}")
        
        try:
            # Open dataset
            ds = xr.open_dataset(file)
            
            # Remove existing time coordinate completely
            ds = ds.squeeze('time', drop=True)  # Remove time dimension and coordinate
            
            # Add new time coordinate
            ds = ds.expand_dims('time')
            ds['time'] = [new_time]
            
            # Set time attributes (avoid 'calendar' which conflicts with encoding)
            ds.time.attrs = {
                'long_name': 'time',
                'standard_name': 'time'
            }
            
            datasets.append(ds)
            
        except Exception as e:
            print(f"❌ Error processing {file}: {e}")
            return False
    
    print(f"\n🔗 Concatenating {len(datasets)} datasets...")
    
    try:
        # Concatenate along time dimension
        merged_ds = xr.concat(datasets, dim='time')
        
        # Sort by time to ensure proper order
        merged_ds = merged_ds.sortby('time')
        
        print(f"✅ Concatenation successful!")
        print(f"📊 Final dimensions: {merged_ds.dims}")
        print(f"🕐 Time range: {merged_ds.time.min().values} to {merged_ds.time.max().values}")
        
        # Save merged dataset
        output_file = "merged_OSTIA_monthly_2015_2025.nc"
        print(f"💾 Saving to {output_file}...")
        
        # Encoding with compression and explicit calendar
        encoding = {
            'time': {'calendar': 'gregorian'}
        }
        for var in merged_ds.data_vars:
            encoding[var] = {
                'zlib': True,
                'complevel': 6,
                'shuffle': True
            }
        
        merged_ds.to_netcdf(output_file, encoding=encoding)
        
        # Cleanup
        for ds in datasets:
            ds.close()
        merged_ds.close()
        
        return output_file
        
    except Exception as e:
        print(f"❌ Merge failed: {e}")
        # Cleanup on error
        for ds in datasets:
            try:
                ds.close()
            except:
                pass
        return False

def inspect_original_time_coords():
    """Inspect what's wrong with the original time coordinates"""
    monthly_files = sorted(glob.glob("monthly_averages/monthly_avg_*.nc"))
    
    print("🔍 INSPECTING ORIGINAL TIME COORDINATES")
    print("=" * 50)
    
    for i, file in enumerate(monthly_files[:5]):  # Check first 5 files
        print(f"\n📄 File {i+1}: {os.path.basename(file)}")
        
        try:
            ds = xr.open_dataset(file)
            
            print(f"   Time dimension: {ds.time}")
            print(f"   Time values: {ds.time.values}")
            print(f"   Time attrs: {ds.time.attrs}")
            print(f"   Time dtype: {ds.time.dtype}")
            
            ds.close()
            
        except Exception as e:
            print(f"   ❌ Error: {e}")
    
    print("\n" + "=" * 50)

# Main execution
current_dir = os.getcwd()
data_dir = os.path.join(current_dir, "OSTIA_clipped")

print("🔧 FIXING TIME COORDINATES AND MERGING")
print("=" * 45)

os.chdir(data_dir)

# First, let's see what's wrong with the current time coords
response = input("Inspect original time coordinates first? (y/n): ")
if response.lower() == 'y':
    inspect_original_time_coords()

# Now fix and merge
response = input("\nProceed with fixing time coordinates and merging? (y/n): ")
if response.lower() != 'y':
    print("Cancelled")
    os.chdir(current_dir)
    exit(0)

start_time = datetime.now()

result = fix_monthly_time_coords()

end_time = datetime.now()
duration = end_time - start_time

if result:
    file_size = os.path.getsize(result) / (1024**3)
    print(f"\n✅ SUCCESS!")
    print(f"⏱️  Time: {duration}")
    print(f"📁 File: {result}")
    print(f"📊 Size: {file_size:.2f} GB")
    
    # Final verification
    print(f"\n🔍 FINAL VERIFICATION:")
    try:
        test_ds = xr.open_dataset(result)
        print(f"   Time steps: {len(test_ds.time)}")
        print(f"   Date range: {test_ds.time.min().values} to {test_ds.time.max().values}")
        print(f"   Variables: {list(test_ds.data_vars.keys())}")
        print(f"   Dimensions: {test_ds.dims}")
        
        # Show first few time values
        print(f"   First 5 times: {test_ds.time[:5].values}")
        print(f"   Last 5 times: {test_ds.time[-5:].values}")
        
        test_ds.close()
        print("✅ File is valid and accessible!")
        
    except Exception as e:
        print(f"❌ Verification failed: {e}")
        
else:
    print(f"\n❌ FAILED after {duration}")

os.chdir(current_dir)

🔧 FIXING TIME COORDINATES AND MERGING


Inspect original time coordinates first? (y/n):  y


🔍 INSPECTING ORIGINAL TIME COORDINATES

📄 File 1: monthly_avg_201505.nc
   Time dimension: <xarray.DataArray 'time' (time: 1)> Size: 8B
array(['2015-05-15T12:00:00.000000000'], dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 8B 2015-05-15T12:00:00
   Time values: ['2015-05-15T12:00:00.000000000']
   Time attrs: {}
   Time dtype: datetime64[ns]

📄 File 2: monthly_avg_201506.nc
   Time dimension: <xarray.DataArray 'time' (time: 1)> Size: 8B
array(['2015-06-15T12:00:00.000000000'], dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 8B 2015-06-15T12:00:00
   Time values: ['2015-06-15T12:00:00.000000000']
   Time attrs: {}
   Time dtype: datetime64[ns]

📄 File 3: monthly_avg_201507.nc
   Time dimension: <xarray.DataArray 'time' (time: 1)> Size: 8B
array(['2015-07-15T12:00:00.000000000'], dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 8B 2015-07-15T12:00:00
   Time values: ['2015-07-15T12:00:00.000000000']
   Time attrs: {


Proceed with fixing time coordinates and merging? (y/n):  y


📁 Found 121 monthly files
Processing monthly_avg_201505.nc: 201505 -> 2015-05-01
Processing monthly_avg_201506.nc: 201506 -> 2015-06-01
Processing monthly_avg_201507.nc: 201507 -> 2015-07-01
Processing monthly_avg_201508.nc: 201508 -> 2015-08-01
Processing monthly_avg_201509.nc: 201509 -> 2015-09-01
Processing monthly_avg_201510.nc: 201510 -> 2015-10-01
Processing monthly_avg_201511.nc: 201511 -> 2015-11-01
Processing monthly_avg_201512.nc: 201512 -> 2015-12-01
Processing monthly_avg_201601.nc: 201601 -> 2016-01-01
Processing monthly_avg_201602.nc: 201602 -> 2016-02-01
Processing monthly_avg_201603.nc: 201603 -> 2016-03-01
Processing monthly_avg_201604.nc: 201604 -> 2016-04-01
Processing monthly_avg_201605.nc: 201605 -> 2016-05-01
Processing monthly_avg_201606.nc: 201606 -> 2016-06-01
Processing monthly_avg_201607.nc: 201607 -> 2016-07-01
Processing monthly_avg_201608.nc: 201608 -> 2016-08-01
Processing monthly_avg_201609.nc: 201609 -> 2016-09-01
Processing monthly_avg_201610.nc: 20161