In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pwlf
import sklearn
from tqdm import tqdm
import geopandas as gpd
from shapely.geometry import Point

In [2]:
# Read data
filename = 'ClimateRegionDivision/HUC_Parquet/allRegion_large_T273check_over60.parquet'
final_large_data_warm_over60 = pd.read_parquet(filename)

final_large_data_warm_over60 = final_large_data_warm_over60[final_large_data_warm_over60['width']>10]

In [3]:
final_large_data_warm_over60['width'].max()

np.float64(19190.61289939818)

In [None]:
# Load GLOW and GLOW-S points

# Paths to the shapefiles
lakes_shapefile_path = 'ClimateRegionDivision/HUC_Parquet/Large lakes 5km2 and Coastlunes/Large Hydrolakes 5km2.shp'
coastline_shapefile_path = "ClimateRegionDivision/HUC_Parquet/Large lakes 5km2 and Coastlunes/GSHHS_f_L1.shp"

# Initialize an empty list to store DataFrames
points_dataframes = []

# Load and concatenate parquet files XS_GLOW_GLOWS_1 to XS_GLOW_GLOWS_8
for i in tqdm(range(1, 9)):
    file_path = f"ClimateRegionDivision/HUC_Parquet/XS_GLOW_GLOWS_{i}.parquet"
    points_df = pd.read_parquet(file_path)
    points_dataframes.append(points_df)

# Concatenate all DataFrames into a single DataFrame
points_df = pd.concat(points_dataframes, ignore_index=True)

# Convert points DataFrame to GeoDataFrame
points_gdf = gpd.GeoDataFrame(
    points_df, 
    geometry=[Point(xy) for xy in zip(points_df.lon, points_df.lat)],
    crs="EPSG:4326"  # Assuming the coordinate system is WGS84
)

print('Loaded shapefile and parquet files')

# Load the lakes shapefile
lakes_gdf = gpd.read_file(lakes_shapefile_path)

# Load the coastline shapefile
coastline_gdf = gpd.read_file(coastline_shapefile_path)

# Ensure both GeoDataFrames have the same CRS as the points
lakes_gdf = lakes_gdf.to_crs(points_gdf.crs)
coastline_gdf = coastline_gdf.to_crs(points_gdf.crs)

# Perform a spatial join to find points within lakes
points_within_lakes = gpd.sjoin(points_gdf, lakes_gdf, predicate='within')

# Inverse the spatial join result to get points outside lakes (on land)
points_outside_lakes = points_gdf[~points_gdf.index.isin(points_within_lakes.index)]

# Now perform a spatial join to find points that fall within the coastline features
points_within_coastline = gpd.sjoin(points_outside_lakes, coastline_gdf, predicate='within')

# Filter points to keep only those that fall within the coastline features
# This ensures that the points are both outside lakes and within coastline features (land)
points_on_land = points_within_coastline

# Assign region based on 'riverID' by extracting the second character and converting to integer
points_on_land['region'] = points_on_land['riverID'].str[1].astype(int)

# Display the number of points within the coastline features
points_on_land_count = points_on_land.shape[0]

print(f"Points on land (inside coastline features): {points_on_land_count}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.43s/it]


In [None]:
# Filter the final_large_data_warm_over60 based on COMIDs for which both GLOW and GLOW-S data are available
points_on_land['COMID'] = points_on_land['ID_unique'].str.extract(r'R(\d+)XS')
COMID_we_have = set([int(i) for i in points_on_land['COMID'].unique()])
final_large_data_warm_over60_inCOMID = final_large_data_warm_over60[final_large_data_warm_over60['COMID'].isin(COMID_we_have)]


In [None]:
final_large_data_warm_over60_inCOMID

In [None]:
# Rename date_x to date_YMD to merge
points_on_land = points_on_land.rename(columns={'date_x': 'date_YMD'})

# Convert COMID to int
points_on_land['COMID'] = points_on_land['COMID'].astype(int)

final_large_data_warm_over60_inCOMID['ID_date'] = final_large_data_warm_over60_inCOMID['riverID'] + final_large_data_warm_over60_inCOMID['date_YMD']

In [None]:
# Merge 'Points on land' and 'final_large_data_warm_over60_inCOMID'
final_large_data_warm_over60_inCOMID_land = pd.merge(
    points_on_land,
    final_large_data_warm_over60_inCOMID[['COMID','date_YMD', 'ID_date', 'hot_enough']],
    on=['ID_date'],
    how='inner'  # Change to 'left' or 'right' if needed
)

In [None]:
final_large_data_warm_over60_inCOMID_land

In [None]:
# Remove all the GLOW-S measurements below 10m and all the GLOW measurements below 30m
final_large_data_warm_over60_inCOMID_land = final_large_data_warm_over60_inCOMID_land[(final_large_data_warm_over60_inCOMID_land['width_x']>10) & (final_large_data_warm_over60_inCOMID_land['width_y']>30)]

In [None]:
from scipy.stats import gaussian_kde
from scipy.stats import binned_statistic_2d

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.size'] = 14

# Plot data
x = final_large_data_warm_over60_inCOMID_land['width_x'].values
y = final_large_data_warm_over60_inCOMID_land['width_y'].values

hb = plt.hexbin(x, y, gridsize=100, cmap='viridis', bins='log', alpha=1)

# Perform linear regression on the original x, y data
model = sklearn.linear_model.LinearRegression()
model.fit(x.reshape(-1, 1), y.reshape(-1, 1))
slope = model.coef_[0][0]
intercept = model.intercept_[0]
r2 = sklearn.metrics.r2_score(y.reshape(-1, 1), intercept + slope*x.reshape(-1, 1))

# Define points for the regression line within the specified range
x_regression_points = np.array([0, 7000])
y_regression_points = slope * x_regression_points + intercept

# Plot the regression line
plt.plot(x_regression_points, y_regression_points, color='red', linewidth=1.5)
plt.title(f'y = {slope:.2f}x + {intercept:.2f} (R$^{2}$ = {r2:.2f})', fontsize=14, verticalalignment='top', color='black', fontweight='bold')

cb = plt.colorbar(hb, pad=0.02)

# Add labels and title
plt.xlabel('GLOW-S width (m)', fontweight='bold')
plt.ylabel('GLOW width (m)', fontweight='bold')

# Set equal scaling, limits, and grid
plt.xlim(0, 7000)  # Limit x-axis range
plt.ylim(0, 7000)  # Limit y-axis range
plt.plot([0, 7000], [0, 7000], color='black')
plt.grid(True, which='both', linestyle='--', linewidth=0.7, color='lightgray')

# Show plot
plt.tight_layout()
plt.savefig('/N/lustre/project/proj-212/abhinav/River_Width_analysis/RiverWidthAnalysis/Final_Figures/GLOW_vs_GLOWS_linear.tiff', dpi=600)
plt.show()

In [None]:
from scipy.stats import gaussian_kde
from scipy.stats import binned_statistic_2d

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.size'] = 14

# Plot data
x = final_large_data_warm_over60_inCOMID_land['width_x'].values
y = final_large_data_warm_over60_inCOMID_land['width_y'].values

x_log = np.log10(x)
y_log = np.log10(y)

hb = plt.hexbin(x_log, y_log, gridsize=100, cmap='viridis', bins='log', alpha=1)

# Perform linear regression on the original x, y data
model = sklearn.linear_model.LinearRegression()
model.fit(x_log.reshape(-1, 1), y_log.reshape(-1, 1))
slope = model.coef_[0][0]
intercept = model.intercept_[0]
r2 = sklearn.metrics.r2_score(y_log.reshape(-1, 1), intercept + slope*x_log.reshape(-1, 1))

# Define points for the regression line within the specified range
x_regression_points = np.array([0, 7000])
y_regression_points = slope * x_regression_points + intercept

# Plot the regression line
plt.plot(x_regression_points, y_regression_points, color='red', linewidth=1.5)
plt.title(f'y = {slope:.2f}x + {intercept:.2f} (R$^{2}$ = {r2:.2f})', fontsize=14, verticalalignment='top', color='black', fontweight='bold')

cb = plt.colorbar(hb, pad=0.02)

# Add labels and title
plt.xlabel('log10(GLOW-S width (m))', fontweight='bold')
plt.ylabel('log10(GLOW width (m))', fontweight='bold')

# Set equal scaling, limits, and grid
plt.xlim(1, 4)  # Limit x-axis range
plt.ylim(1, 4)  # Limit y-axis range
plt.plot([0, 4], [0, 4], color='black')
plt.grid(True, which='both', linestyle='--', linewidth=0.7, color='lightgray')

# Show plot
plt.savefig('/N/lustre/project/proj-212/abhinav/River_Width_analysis/RiverWidthAnalysis/Final_Figures/GLOW_vs_GLOWS_log10.tiff', dpi=600)
plt.show()

In [None]:
from matplotlib.colors import LogNorm

# Latitude ranges to filter data
lat_ranges = [(60, 90), (30, 60), (0, 30), (-30, 0), (-60, -30)]
range_labels = ['60 to 90', '30 to 60', '0 to 30', '-30 to 0', '-60 to -30']

# Create a 2x3 grid for subplots (5 plots)
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.size'] = 14
fig, axes = plt.subplots(2, 3, figsize=(13, 8))
axes = axes.ravel()  # Flatten to easily iterate

# Set global x and y axis limits
x_lim = (0, 7000)
y_lim = (0, 7000)

# Iterate over latitude ranges and plot
for i, lat_range in enumerate(lat_ranges):
    # Filter data within the current latitude range
    filtered_data = final_large_data_warm_over60_inCOMID_land[(final_large_data_warm_over60_inCOMID_land['lat'] >= lat_range[0]) & 
                                                               (final_large_data_warm_over60_inCOMID_land['lat'] < lat_range[1])]
    
    x_widths = filtered_data['width_x'].values
    y_widths = filtered_data['width_y'].values
    
    # Perform linear regression on filtered data
    model = sklearn.linear_model.LinearRegression()
    model.fit(x_widths.reshape(-1, 1), y_widths.reshape(-1, 1))
    slope = model.coef_[0][0]
    intercept = model.intercept_[0]
    r2 = sklearn.metrics.r2_score(y_widths.reshape(-1, 1), intercept + slope*x_widths.reshape(-1, 1))

    x_reg_line_points = np.array([0, 7000])
    y_reg_line_points = slope * x_reg_line_points + intercept
    
    # Plot the hexbin plot on the current axis
    ax = axes[i]
    hb = ax.hexbin(x_widths, y_widths, gridsize=100, cmap='viridis', bins='log')
    ax.plot([0, 7000], [0, 7000], color='black')
    
    # Plot the regression line
    print(intercept)
    if intercept > 0:
        ax.plot(x_reg_line_points, y_reg_line_points, color='red', linewidth=1, 
                label=f'Regression: y = {slope:.2f}x + {intercept:.2f}\n$R^2$ = {r2:.2f}')
    else:
        ax.plot(x_reg_line_points, y_reg_line_points, color='red', linewidth=1, 
                label=f'Regression: y = {slope:.2f}x - {abs(intercept):.2f}\n$R^2$ = {r2:.2f}')
    
    
    # Add regression equation as text within the plot
    ax.text(0.05, 0.95, f'y = {slope:.2f}x + {intercept:.2f}\n$R^2$ = {r2:.2f}',
            transform=ax.transAxes, fontsize=11, verticalalignment='top', color='black')
    
    # Add title and set aspect
    ax.set_title(f'Latitude Range: {range_labels[i]}', fontsize=13, fontweight='bold')
    ax.set_xlim(x_lim)
    ax.set_ylim(y_lim)
    ax.set_aspect('equal', adjustable='box')
    ax.grid(True, which='both', linestyle='--', linewidth=0.7, color='lightgray')
    
    # Set locators for consistent intervals
    ax.tick_params(axis='both', which='major', labelsize=10)
fig.text(0.08, 0.5, 'GLOW width (m)', va='center', rotation='vertical', fontweight='bold')
fig.text(0.5, 0.04, 'GLOW-S width (m)', ha='center', fontweight='bold')

# Hide the unused subplot (6th plot in 2x3 grid)
axes[-1].axis('off')

# Add a common color bar for all plots
cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])  # Position of color bar
cb = fig.colorbar(hb, cax=cbar_ax)
cb.set_label('Number of Observations (log scale)')

# Adjust layout for spacing between subplots
plt.subplots_adjust(wspace=0.2, hspace=0.3)

# Show the plots
plt.savefig('/N/lustre/project/proj-212/abhinav/River_Width_analysis/RiverWidthAnalysis/Final_Figures/GLOW_vs_GLOWS_latitude.tiff', dpi=600)
plt.show()