## Analysis and Plots

#### Libraries

In [1]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib.path as mpath
import matplotlib.patches as patches
import matplotlib.cm as cm
import xarray as xr
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.ticker as mticker
import cartopy.mpl.gridliner as gridliner
import warnings
import os
warnings.simplefilter('ignore')
warnings.filterwarnings("ignore", message="All-NaN slice encountered")
%matplotlib inline
import time

In [2]:
start_time=time.time()  # Marking the start time for measuring execution duration

print('Starting the Data Processing...')
print('=====================================')

# Specifying the main directory path where the source data files are stored
main_dir="/storage2/tkyeimiah/depot.cmc.ec.gc.ca/"

# Specifying a list of number codes in the data and their respective replacement values
listnum = [-9.0, 1.0, 2.0, 10.0, 20.0, 30.0, 40.0, 50.0, 55.0, 60.0, 70.0, 80.0, 90.0, 91.0, 92.0, 98.0]
replace = [np.nan, 0.05, 0.075, 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.7, 0.8, 0.9, 0.95, 1, 0]

# Initialize an empty list to store monthly data
CT_monthly_data = []

# Loop over the specified year range
for yyyy in range(1990,2021):
    print(f"Year {yyyy}: Data Processing Starts")
    
    # Loop over all months in a year
    for mm in range (1,13):
        # Construct the file search pattern using the current year and month
        pattern = f"{main_dir}*_{yyyy}{mm:02d}*.nc"
        
        # Finding all files that match the pattern for the current month
        file_paths = glob.glob(pattern)
        
        # Opening and concatenating all data files for the current month
        ds = xr.open_mfdataset(file_paths)
        CT=ds['CT']  # Extracting the sea ice concentration data
        
        # Replacing coded figures in the data with actual values
        for i in range(16):
            CT=CT.where(CT != listnum[i], replace[i])
        
        # Resampling from weekly to monthly and computing the mean
        CT_resampled=CT.resample(time='1M').mean('time', skipna=True)
        
        # Appending the resampled data to the list
        CT_monthly_data.append(CT_resampled)
    
    print(f'Year {yyyy}: Data Processing Completed')
    print('=====================================')


print('Merging Data Across all Years...')       
# Concatenate all monthly average data along the 'time' dimension
CT_all_combined = xr.concat(CT_monthly_data, dim='time')
print('Data Merging Completed')

# OPTIONAL: Removing any existing file, uncomment if necessary 
#print('Removing any Existing Output File...')
! rm -r /storage2/tkyeimiah/Obs/CIS/CIS_year/CT_all_combined.nc
print('Existing Output File Removed')

# Save the concatenated data to a NetCDF file
CT_all_combined.to_netcdf('/storage2/tkyeimiah/Obs/CIS/CIS_year/CT_all_combined.nc')
print('Data Saved to NetCDF Format')

# Calculate and display the total execution time
end_time=time.time()
elapsed_time = end_time - start_time
hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)

print(f"Data Processing Time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")

Starting the Data Processing...
Year 1990: Data Processing Starts
Year 1990: Data Processing Completed
Year 1991: Data Processing Starts
Year 1991: Data Processing Completed
Year 1992: Data Processing Starts
Year 1992: Data Processing Completed
Year 1993: Data Processing Starts
Year 1993: Data Processing Completed
Year 1994: Data Processing Starts
Year 1994: Data Processing Completed
Year 1995: Data Processing Starts
Year 1995: Data Processing Completed
Year 1996: Data Processing Starts
Year 1996: Data Processing Completed
Year 1997: Data Processing Starts
Year 1997: Data Processing Completed
Year 1998: Data Processing Starts
Year 1998: Data Processing Completed
Year 1999: Data Processing Starts
Year 1999: Data Processing Completed
Year 2000: Data Processing Starts
Year 2000: Data Processing Completed
Year 2001: Data Processing Starts
Year 2001: Data Processing Completed
Year 2002: Data Processing Starts
Year 2002: Data Processing Completed
Year 2003: Data Processing Starts
Year 2003: 

In [4]:
# Create time coordinates for three days (January 1st, 2nd, and 3rd, 2023)
time_coords = pd.date_range(start='2023-01-01', periods=2, freq='D')

# Create longitude and latitude coordinates
lon_coords = np.linspace(-180, 180, 4)  # 4 longitudes
lat_coords = np.linspace(-90, 90, 3)    # 3 latitudes

# Create sample data with NaN values for each day
data = np.array([
    [
        [1.0, 1.0, 1.0, np.nan],
        [1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0],
    ],
    [
        [np.nan, 1.0, 1.0, np.nan],
        [np.nan, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0],
    ],
])  # Data for each day
# Create an xarray DataArray
ds = xr.DataArray(data, dims=('time', 'lat', 'lon'),
                  coords={'time': time_coords, 'lon': lon_coords, 'lat': lat_coords})
# Resample the dataset along the 'time' dimension 
resampled_dataa = ds.resample(time='1M').mean('time',  skipna=True)

# Check the resampled data for NaN values
resampled_dataa