In [30]:
# Utils functions and globals 

import xarray as xr
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import cartopy.crs as ccrs
from shapely.geometry import box
import rioxarray
from rasterio.features import geometry_mask
from scipy.stats import linregress

# Open the biodiversity priority areas based on Zhao et al. 2020 (https://www.sciencedirect.com/science/article/abs/pii/S0006320719312182?via%3Dihub)
masked_data = rioxarray.open_rasterio('masked_top_30_percent_over_water.tif')

# Set the CRS for masked_data if it's not already set
if 'crs' not in masked_data.attrs:
    masked_data.rio.write_crs('EPSG:4326', inplace=True)

# Load SST dataset and EEZ shapefile
seas_shapefile_path = '../Data/World_Seas_IHO_v3/World_Seas_IHO_v3.shp'
SEAS_DF = gpd.read_file(seas_shapefile_path)

# Calculate linear trend and p-value for each grid point
def calculate_trend_and_significance(x):
    if np.isnan(x).all():
        return np.nan, np.nan, np.nan
    else:
        slope, intercept, _, p_value, _ = stats.linregress(range(len(x)), x)
        return slope, intercept, p_value

# Calculate the trend and significance of the trend at each pixel in an xarray dataset
def calculate_trend_df(climate_df):
    df_mean = climate_df.groupby('time.year').mean()
    
    # Apply the trend and p-value calculation to the entire dataset
    results = xr.apply_ufunc(
        calculate_trend_and_significance,
        df_mean,
        input_core_dims=[['year']],
        vectorize=True,
        output_core_dims=[[], [], []],
        output_dtypes=[float, float, float]
    )
    
    # Extract the trend and p-value into separate DataArrays
    trends_da = xr.DataArray(results[0], coords=df_mean.isel(year=0).coords, name='trend')
    pvalues_da = xr.DataArray(results[2], coords=df_mean.isel(year=0).coords, name='p_value')
    
    # Create a significance mask where p-value < 0.05
    significant_da = xr.DataArray((pvalues_da < 0.05), coords=pvalues_da.coords, name='significant')
    
    # Combine trend, p-value, and significance mask into a single dataset
    trend_significance_ds = xr.Dataset({
        'trend': trends_da,
        'p_value': pvalues_da,
        'significant': significant_da
    })
    
    # Set the CRS for the trends dataset to match the EEZ CRS
    trend_significance_ds = trend_significance_ds.rio.write_crs("epsg:4326")
    return trend_significance_ds

# Calculate area-weighted trend, significance for each sea/ocean area
def area_trend(trend_significance_ds, SEAS_DF=SEAS_DF):
    # Iterate over each sea/ocean area and calculate the area-weighted trend and significant area percentage
    area_weighted_trends = []
    
    # Check if 'lat' and 'lon' are in the dataset, otherwise check for 'latitude' and 'longitude'
    if 'lat' in trend_significance_ds.dims and 'lon' in trend_significance_ds.dims:
        trend_significance_ds = trend_significance_ds.rename({'lat': 'y', 'lon': 'x'})
    elif 'latitude' in trend_significance_ds.dims and 'longitude' in trend_significance_ds.dims:
        trend_significance_ds = trend_significance_ds.rename({'latitude': 'y', 'longitude': 'x'})


    # Interpolate biodiversity priority areas to the same resolution as the climate data
    masked_data_interp = masked_data.interp(
        x=trend_significance_ds['x'],
        y=trend_significance_ds['y'],
        method='nearest'
    )

    # Calculate the area for each grid cell (assumes lat/lon grid)
    lat = trend_significance_ds['y'].values
    lon = trend_significance_ds['x'].values
    
    # Calculate grid cell area using Haversine formula or by approximation
    lat_rad = np.deg2rad(lat)
    lon_rad = np.deg2rad(lon)
    
    # Earth radius in kilometers
    R = 6371
    dlat = np.gradient(lat_rad)
    dlon = np.gradient(lon_rad)
    
    # Approximate area calculation
    cell_areas = (R**2 * np.outer(np.sin(dlat), dlon)) * np.cos(lat_rad[:, None])
    
    for i, row in SEAS_DF.iterrows():
        try:
            region_name = row['NAME']
            area = row['area']
            geom = row['geometry']
    
            # Mask SST trends with the sea geometry
            masked_trends = trend_significance_ds['trend'].rio.clip([geom], drop=True)
            masked_significance = trend_significance_ds['significant'].rio.clip([geom], drop=True)
    
            # Clip cell_areas to the same extent as masked_trends
            cell_areas_clipped = xr.DataArray(
                cell_areas, 
                dims=['y', 'x'], 
                coords={'y': trend_significance_ds['y'], 'x': trend_significance_ds['x']}
            )
            
            # Set CRS for cell_areas_clipped to match the CRS of trend_significance_ds
            cell_areas_clipped = cell_areas_clipped.rio.write_crs('EPSG:4326')
    
            # Clip cell_areas to the same geometry
            cell_areas_clipped = cell_areas_clipped.rio.clip([geom], drop=True)
        
            # Compute the area-weighted trend
            weighted_trend = (masked_trends * cell_areas_clipped).sum(dim=('y', 'x')) / cell_areas_clipped.sum()
    
            # Compute the total area that is significant
            significant_masked_areas = (masked_significance * cell_areas_clipped).where(masked_significance, 0)
            total_significant_area = significant_masked_areas.sum(dim=('y', 'x')).item()
    
            # Calculate the percentage of the area that is significant
            total_area = cell_areas_clipped.sum()
            significant_area_percent = (total_significant_area / total_area) * 100
    
            # Calculate the area for biodiversity based on the mask
            area_biodiversity = ((masked_significance * cell_areas_clipped) * masked_data_interp).sum(dim=['x', 'y']).values
    
            # Store the result
            area_weighted_trends.append({
                'Region_Name': region_name,
                'geometry': geom,
                'Weighted_Trend': weighted_trend.item(),
                'Sea_Area': total_area.values,
                'Significant_Area': total_significant_area,
                'Significant_Area_Percent': significant_area_percent.values,
                'Biodiversity_Area': area_biodiversity[0]
            })
        except Exception as e:
            print(e)

    # Convert the results to a GeoDataFrame for easy viewing
    area_weighted_trends_gdf = gpd.GeoDataFrame(area_weighted_trends, crs=SEAS_DF.crs)
    return area_weighted_trends_gdf


# Temperature

## Figure 1

In [33]:
ocean_data = pd.read_csv("../Data/GISTEMP_SST.csv") # Data downloaded from GISS Surface Temperature Analysis (v4)
gmst_data = pd.read_csv("../Data/GMST_GISTEMP4.csv") # Data downloaded from GISS Surface Temperature Analysis (v4)

temp_data = ocean_data.merge(gmst_data,on='Year')

# Calculate the mean of the 'No_Smoothing' column for the period 1880-1900
base_period = temp_data[(temp_data['Year'] >= 1880) & (temp_data['Year'] <= 1900)]
mean_sst_base_period = base_period['Ocean_Annual'].mean()
mean_gmst_base_period = base_period['No_Smoothing'].mean()

# Update the 'No_Smoothing' column to be anomalies relative to the period 1880-1900
temp_data['Ocean_Annual'] = temp_data['Ocean_Annual'] - mean_sst_base_period
temp_data['GMST_Annual'] = temp_data['No_Smoothing'] - mean_gmst_base_period

# Perform linear regression to find the slope and intercept
slope, intercept, _, _, _ = linregress(temp_data['Year'], temp_data['Ocean_Annual'])

# Calculate the trend line (y = mx + b) for each time point
temp_data['ocean_trend'] = intercept + slope * temp_data['Year']

# Perform linear regression to find the slope and intercept
slope, intercept, _, _, _ = linregress(temp_data['Year'], temp_data['GMST_Annual'])

# Calculate the trend line (y = mx + b) for each time point
temp_data['gmst_trend'] = intercept + slope * temp_data['Year']
temp_data['paris_goal'] = 1.5

temp_data[['Year','Ocean_Annual','ocean_trend','GMST_Annual','gmst_trend','paris_goal']].to_csv("../Data/mitigate_climate_change_1_temperature.csv")

# Display the updated DataFrame
temp_data[['Year','Ocean_Annual','ocean_trend','GMST_Annual','gmst_trend','paris_goal']]


Unnamed: 0,Year,Ocean_Annual,ocean_trend,GMST_Annual,gmst_trend,paris_goal
0,1880,0.080476,-0.271844,0.038571,-0.280867,1.5
1,1881,0.140476,-0.265884,0.128571,-0.272933,1.5
2,1882,0.130476,-0.259924,0.108571,-0.264999,1.5
3,1883,0.070476,-0.253964,0.038571,-0.257065,1.5
4,1884,-0.019524,-0.248004,-0.061429,-0.249131,1.5
...,...,...,...,...,...,...
139,2019,0.810476,0.556595,1.198571,0.821968,1.5
140,2020,0.800476,0.562555,1.228571,0.829902,1.5
141,2021,0.690476,0.568515,1.068571,0.837836,1.5
142,2022,0.740476,0.574475,1.108571,0.845771,1.5


## Figure 3

In [35]:
temp_df = xr.open_dataset("~/Downloads/OceanSODA_ETHZ-v2023.OCADS.01_1982-2022.nc")

trend_significance_ds = calculate_trend_df(temp_df['temperature'])

area_df = area_trend(trend_significance_ds)

# Save the GeoDataFrame to a GeoJSON file
area_df.to_csv("../Data/mitigate_climate_change_3_temperature.csv")


No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend


# Salinity

## Figure 1

In [52]:
import xarray as xr
import pandas as pd

salt_df = xr.open_dataset("~/Downloads/OceanSODA_ETHZ-v2023.OCADS.01_1982-2022.nc")
salt_df = salt_df['salinity'].mean(dim=['lat','lon']).resample(time='Y').mean()

final_subset = salt_df.sel(time=slice('1994-01-01', None))

# Create a pandas DataFrame with these columns
df = pd.DataFrame({
    'time': final_subset['time'].values,
    'salinity': final_subset.values,
})

# Convert 'time' to datetime
df['time'] = pd.to_datetime(df['time'])

# Convert datetime to a numerical value for linear regression (using ordinal format)
df['time_ordinal'] = df['time'].map(pd.Timestamp.toordinal)

# Perform linear regression to find the slope and intercept
slope, intercept, _, _, _ = linregress(df['time_ordinal'], df['salinity'])

# Calculate the trend line (y = mx + b) for each time point
df['linear_trend'] = intercept + slope * df['time_ordinal']

df[['time','salinity','linear_trend']].to_csv("../Data/mitigate_climate_change_1_salinity.csv")

# Display the updated DataFrame
df[['time','salinity','linear_trend']].head()

  index_grouper = pd.Grouper(


Unnamed: 0,time,salinity,linear_trend
0,1994-12-31,33.936993,33.933002
1,1995-12-31,33.936226,33.933941
2,1996-12-31,33.936901,33.934882
3,1997-12-31,33.936794,33.93582
4,1998-12-31,33.934608,33.936759


## Figure 4

In [53]:
salt_df = xr.open_dataset("~/Downloads/OceanSODA_ETHZ-v2023.OCADS.01_1982-2022.nc")

trend_significance_ds = calculate_trend_df(salt_df['salinity'])

area_df = area_trend(trend_significance_ds)

# Save the GeoDataFrame to a GeoJSON file
area_df.to_csv("../Data/mitigate_climate_change_4_salinity.csv")


No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend


# Acidity

## Figure 1

In [2]:
import xarray as xr
import pandas as pd

acid_df = xr.open_dataset("~/Downloads/OceanSODA_ETHZ-v2023.OCADS.01_1982-2022.nc")
acid_df = acid_df['ph_total'].mean(dim=['lat','lon']).resample(time='Y').mean()

# Create a pandas DataFrame with these columns
df = pd.DataFrame({
    'time': acid_df['time'].values,
    'ph_total': acid_df.values,
})

# Convert 'time' to datetime
df['time'] = pd.to_datetime(df['time'])

# Convert datetime to a numerical value for linear regression (using ordinal format)
df['time_ordinal'] = df['time'].map(pd.Timestamp.toordinal)

# Perform linear regression to find the slope and intercept
slope, intercept, _, _, _ = linregress(df['time_ordinal'], df['ph_total'])

# Calculate the trend line (y = mx + b) for each time point
df['linear_trend'] = intercept + slope * df['time_ordinal']

df[['time','ph_total','linear_trend']].to_csv("../Data/mitigate_climate_change_1_pH.csv")

# Display the updated DataFrame
df[['time','ph_total','linear_trend']]

  index_grouper = pd.Grouper(


Unnamed: 0,time,ph_total,linear_trend
0,1982-12-31,8.131546,8.131885
1,1983-12-31,8.129746,8.130157
2,1984-12-31,8.126836,8.128425
3,1985-12-31,8.125478,8.126697
4,1986-12-31,8.124204,8.124968
5,1987-12-31,8.122771,8.12324
6,1988-12-31,8.119001,8.121508
7,1989-12-31,8.117866,8.11978
8,1990-12-31,8.116906,8.118052
9,1991-12-31,8.115874,8.116324


## Figure 3

In [4]:
acid_df = xr.open_dataset("~/Downloads/OceanSODA_ETHZ-v2023.OCADS.01_1982-2022.nc")

trend_significance_ds = calculate_trend_df(acid_df['ph_total'])

area_df = area_trend(trend_significance_ds)

# Save the GeoDataFrame to a GeoJSON file
area_df.to_csv("../Data/mitigate_climate_change_3_pH.csv")


No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend


# Sea Level Rise

## Figure 1

In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import io
import requests
import numpy as np
from scipy import stats
from datetime import datetime, timedelta

# Load the data from the file, obtain unique hyperlink from https://sealevel.nasa.gov/
url = 'https://deotb6e7tfubr.cloudfront.net/s3-edaf5da92e0ce48fb61175c28b67e95d/podaac-ops-cumulus-protected.s3.us-west-2.amazonaws.com/MERGED_TP_J1_OSTM_OST_GMSL_ASCII_V51/GMSL_TPJAOS_5.1.txt?A-userid=ps4813&Expires=1728253818&Signature=K3xewYQ~8a6Nl7xIoZ2wmsqgO4DR7~33lQLuhlH7uKH65PXRuAbddjFtW6riKHrOu8En20JKpku-56oBaaNG7asukhyorbhtOtDYKoFE5AiNe5gbaLN8bINf1RHym25W6vnBl76izBTI6FFy3CGCd74TpRRLwButl~M42cR1xANQ8SNa2A1xxTdBYgdnC5QkySZztz04VSgzqprotyDV88wq8MZ1PduSOALV-8PSgAUXX9Y74xvQNFMMcvuMpByfl7oXdRWmNIXRgRPUj5KnTch27wiuHwRWcAGry-vOwh9uShM6g0~drgwX2JFxntbqZobfTTkMcd2uFLWs3qq3pg__&Key-Pair-Id=K2ECMKQ3SIJ8HS'
# Fetch the content
response = requests.get(url)
content = response.text

# Split the content into lines
lines = content.split('\n')

# Find the index of the line containing "Header_End"
header_end_index = next(i for i, line in enumerate(lines) if "Header_End" in line)

# Read the data, skipping the header rows
raw_data = pd.read_csv(io.StringIO('\n'.join(lines[header_end_index + 1:])), 
                       sep='\s+', 
                       header=None)

# Create a new DataFrame with 'date' and 'SLR' columns
df = pd.DataFrame({
    'date': raw_data[2],
    'SLR': raw_data[5] - raw_data[5].iloc[0]  # Shifting SLR so that the first value is 0
})

# Function to convert fractional year to datetime (year, month, day only)
def fractional_year_to_datetime(year):
    year_int = int(year)  # Extract the integer part
    remainder = year - year_int  # Get the fractional part
    beginning_of_year = datetime(year_int, 1, 1)
    days_in_year = (datetime(year_int + 1, 1, 1) - beginning_of_year).days
    return (beginning_of_year + timedelta(days=remainder * days_in_year)).date()

# Convert the fractional years in 'date' column to datetime (year-month-day)
df['date'] = df['date'].apply(fractional_year_to_datetime)

# Extract the year from the 'date' column and create a new 'year' column
df['year'] = df['date'].apply(lambda x: x.year)

# Group by the 'year' column and calculate the mean for the 'SLR' column
df_grouped = df.groupby('year').mean(numeric_only=True)

# Fit a linear trend
slope, intercept, r_value, p_value, std_err = stats.linregress(df_grouped.index, df_grouped['SLR'])

# Add the linear trend to the DataFrame
df_grouped['linear_trend'] = slope * df_grouped.index + intercept

# Save the grouped data to a CSV file
df_grouped.to_csv("../Data/mitigate_climate_change_1_SLR.csv")

# Display the first few rows of the grouped DataFrame
df_grouped.head()


Unnamed: 0_level_0,SLR,linear_trend
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1993,4.838919,1.617056
1994,9.133514,4.672681
1995,12.806757,7.728305
1996,13.429722,10.783929
1997,17.778108,13.839553


## Figure 3

In [31]:
# Copernicus Climate Change Service, Climate Data Store, (2018): Sea level daily gridded data from satellite observations for the global ocean from 1993 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS)
SLR_df = xr.open_mfdataset("../Data/dataset-satellite-sea-level-global-dc7f92ea-2d3e-4fc6-b767-836a5b8c0bff/*.nc")

trend_significance_ds = calculate_trend_df(SLR_df['sla'].load())

area_df = area_trend(trend_significance_ds)

# Save the GeoDataFrame to a GeoJSON file
area_df.to_csv("../Data/mitigate_climate_change_3_SLR.csv")

No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data variable: trend
No data found in bounds. Data vari

# Sea Ice

## Figure 1

In [23]:
import pandas as pd
import requests
from io import StringIO

# Base URLs for the NSIDC Sea Ice Index monthly data (North and South)
base_urls = {
    "north": "https://noaadata.apps.nsidc.org/NOAA/G02135/north/monthly/data/",
    "south": "https://noaadata.apps.nsidc.org/NOAA/G02135/south/monthly/data/"
}

# List of file names for North and South
file_names = {
    "north": [f"N_{month:02d}_extent_v3.0.csv" for month in range(1, 13)],
    "south": [f"S_{month:02d}_extent_v3.0.csv" for month in range(1, 13)]
}

# Function to download and load a single file
def download_and_load(base_url, file_name):
    url = base_url + file_name
    response = requests.get(url)
    if response.status_code == 200:
        data = StringIO(response.text)
        df = pd.read_csv(data)
        df['mo'] = int(file_name.split('_')[1])  # Extract month from filename
        return df
    else:
        print(f"Failed to download {file_name}")
        return None

# Download and load all files for North and South
dataframes = {}
for region in base_urls:
    dataframes[region] = [download_and_load(base_urls[region], file) for file in file_names[region]]

# Remove any None values (failed downloads) and concatenate dataframes
for region in dataframes:
    dataframes[region] = [df for df in dataframes[region] if df is not None]
    dataframes[region] = pd.concat(dataframes[region], ignore_index=True)
    dataframes[region] = dataframes[region].sort_values(['year', 'mo']).reset_index(drop=True)

# Add north and south data together for corresponding year-month pairs
combined_data = pd.merge(
    dataframes['north'], 
    dataframes['south'], 
    on=['year', 'mo'], 
    suffixes=('_north', '_south')
)

# Calculate total extent (this assumes 'extent' column exists in both north and south data)
combined_data['total_extent'] = combined_data[' extent_north'] + combined_data[' extent_south']

combined_data = combined_data.query("` extent_north` != -9999")

# Calculate the annual average for extent_north and extent_south
annual_avg = combined_data.groupby('year').mean(numeric_only=True)[[' extent_north', ' extent_south']]

# Calculate the linear trend for extent_north
slope_north, intercept_north, r_value_north, p_value_north, std_err_north = stats.linregress(
    annual_avg.index, annual_avg[' extent_north']
)

# Calculate the linear trend for extent_south
slope_south, intercept_south, r_value_south, p_value_south, std_err_south = stats.linregress(
    annual_avg.index, annual_avg[' extent_south']
)

# Add the linear trend values as new columns to the DataFrame
annual_avg['linear_trend_north'] = slope_north * annual_avg.index + intercept_north
annual_avg['linear_trend_south'] = slope_south * annual_avg.index + intercept_south

annual_avg.to_csv("../Data/mitigate_climate_change_1_sea_ice.csv")

# Display the first few rows of the annual averages with trends
annual_avg.head()

Unnamed: 0_level_0,extent_north,extent_south,linear_trend_north,linear_trend_south
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1978,12.66,13.15,12.509701,11.82765
1979,12.35,11.655833,12.459839,11.814673
1980,12.348333,11.205833,12.409976,11.801696
1981,12.146667,11.386667,12.360113,11.788719
1982,12.4675,11.595,12.310251,11.775742
