<a href="https://colab.research.google.com/github/IzaakGagnon/SSC_GliderProject/blob/main/Preliminary_Data_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pykrige

Collecting pykrige
  Downloading PyKrige-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Downloading PyKrige-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (979 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m979.6/979.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pykrige
Successfully installed pykrige-1.7.2


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pykrige.ok import OrdinaryKriging
import gc

# List of dataset file paths
file_names = [
    "/content/cabot_20220603_155_delayed_corrected_v4.csv",
    "/content/fundy_20190604_99_delayed_corrected_v4.csv",
    "/content/sambro_20210705_134_delayed_corrected_v4.csv",
    "/content/scotia_20180720_87_delayed_corrected_v4.csv",
    "/content/scotia_20181113_95_delayed_corrected_v4.csv"
]

# Read and concatenate the datasets
dfs = [pd.read_csv(f) for f in file_names]
data = pd.concat(dfs, ignore_index=True)

# Randomly sample 2% of the data to further reduce memory usage.
sample_frac = 0.02  # Try 5% of the data
data_sampled = data.sample(frac=sample_frac, random_state=42)

# Ensure key columns are numeric and drop any rows with missing values.
for col in ['latitude', 'longitude', 'micromoles_of_oxygen_per_unit_mass_in_sea_water']:
    data_sampled[col] = pd.to_numeric(data_sampled[col], errors='coerce')
data_sampled = data_sampled.dropna(subset=['latitude', 'longitude', 'micromoles_of_oxygen_per_unit_mass_in_sea_water'])

# Downcast numeric columns to float32 to reduce memory usage.
lat = data_sampled['latitude'].values.astype(np.float32)
lon = data_sampled['longitude'].values.astype(np.float32)
oxygen = data_sampled['micromoles_of_oxygen_per_unit_mass_in_sea_water'].values.astype(np.float32)

# Free memory from the full dataset if not needed further
del data, dfs
gc.collect()

# Create a grid over the study area with reduced resolution.
grid_resolution = 20  # Use a coarser grid (20 points instead of 50 or 100)
grid_lon = np.linspace(lon.min(), lon.max(), grid_resolution)
grid_lat = np.linspace(lat.min(), lat.max(), grid_resolution)
grid_lon_mesh, grid_lat_mesh = np.meshgrid(grid_lon, grid_lat)

# Set up Ordinary Kriging with a linear variogram model.
OK = OrdinaryKriging(lon, lat, oxygen,
                     variogram_model='linear',
                     verbose=False,
                     enable_plotting=False)

# Perform the kriging interpolation on the grid.
z, ss = OK.execute('grid', grid_lon, grid_lat)

# Plot the interpolated oxygen field overlaid with the sampled data points.
plt.figure(figsize=(8, 6))
contour = plt.contourf(grid_lon_mesh, grid_lat_mesh, z, cmap='viridis', levels=50)
plt.scatter(lon, lat, c=oxygen, edgecolor='k', s=20, label='Glider Observations (Sampled)')
plt.colorbar(contour, label='Oxygen Concentration (µmol/kg)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Kriging Interpolation with Reduced Data and Grid Resolution')
plt.legend()
plt.show()