# Demo I: Dataset Module

- Time for complete conversion of 2months of data on exclusive GPU: ~1hr

dates:
  start: 2020-12-31T00:00
  end: 2021-02-01T23:00
  frequency: 6h

input:
  xarray-zarr:
    url: "gcp_era5_subset.zarr"
    param: [2m_temperature,
    10m_u_component_of_wind,
    geopotential,
    10m_v_component_of_wind,
    surface_pressure]


### Import Datasets Module's Create method

In [None]:
from anemoi.datasets.commands.create import *
import os, argparse
main_ds_config_fldrname = "datasets_configs"

### Convert Zarr to an Anemoi-Formatted Zarr

In [None]:
# ERA5 zarr sample from GCP on local converted to anemoi-formatted zarr
ds_config_fn = "local-gcp-sample-zarr.yaml"
save2fn = "anemoi-local-gcp-sample-zarr.zarr"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_ds_config_fldrname}/{ds_config_fn}",
                          path=save2fn,
                          overwrite=True,
                          test='',
                          threads=int(),
                          processes=int(),
                          command='')
Create().run(args)

In [None]:
# Zarr from S3 converted to anemoi-formatted zarr
ds_config_fn = "s3-httpsurl-sample-zarr.yaml"
save2fn = "anemoi-s3-httpsurl-sample-zarr.zarr"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_ds_config_fldrname}/{ds_config_fn}",
                          path=save2fn,
                          overwrite=True,
                          test='',
                          threads=int(),
                          processes=int(),
                          command='')
Create().run(args)

In [None]:
# Zarr sample from GCP converted to anemoi-formatted zarr
ds_config_fn = "gcp-gsurl-sample-zarr.yaml"
save2fn = "anemoi-gcp-gsurl-sample-zarr.zarr"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_ds_config_fldrname}/{ds_config_fn}",
                          path=save2fn,
                          overwrite=True,
                          test='',
                          threads=int(),
                          processes=int(),
                          command='')
Create().run(args)

# Demo II: Graph Module

### Import Graphs Module's Create & Inspect methods

In [None]:
from anemoi.graphs.commands.create import *
from anemoi.graphs.commands.inspect import *
import os, argparse
os.environ["HYDRA_FULL_ERROR"] = "1"
main_graphs_config_fldrname = "graphs_configs"


### Generate Graph of a Zarr

In [None]:
# Generate graph of gcp zarr sample
graphs_config_fn = "local_gcp_encoder_processor_decoder_connect_bw_hiddens_hex_recipe.yaml"
save2fn = "anemoi-local-gcp-sample-zarr-graph.pt"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_graphs_config_fldrname}/{graphs_config_fn}",
                          save_path=save2fn,
                          overwrite=False,
                          description='')
Create().run(args)

In [None]:
# Generate graph of gcp zarr sample
graphs_config_fn = "local_gcp_encoder_processor_decoder_connect_bw_hiddens_tri_recipe.yaml"
save2fn = "anemoi-local-gcp-sample-zarr-graph.pt"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_graphs_config_fldrname}/{graphs_config_fn}",
                          save_path=save2fn,
                          overwrite=False,
                          description='')
Create().run(args)

In [None]:
# Generate graph of gcp zarr sample
graphs_config_fn = "local_gcp_encoder_processor_decoder_connect_bw_hiddens_recipe.yaml"
save2fn = "anemoi-local-gcp-sample-zarr-graph.pt"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_graphs_config_fldrname}/{graphs_config_fn}",
                          save_path=save2fn,
                          overwrite=False,
                          description='')
Create().run(args)

In [None]:
# Generate graph of s3 zarr
graphs_config_fn = "s3_encoder_processor_decoder_connect_bw_hiddens_recipe.yaml"
save2fn ="anemoi-s3-httpsurl-sample-zarr-graph.pt"

args = argparse.Namespace(config=f"{os.getcwd()}/{main_graphs_config_fldrname}/{graphs_config_fn}",
                          save_path=save2fn,
                          overwrite=False,
                          description='')
Create().run(args)

### Inspect Graph

In [None]:
# Inspect generated graph of gcp zarr sample
saved_graph_fn = "anemoi-local-gcp-sample-zarr-graph.pt"

args = argparse.Namespace(path=saved_graph_fn,
                          output_path="anemoi-local-gcp-sample-zarr-graph-output-plots",
                          description='')
Inspect().run(args)


In [None]:
# Inspect generated graph of the s3 zarr
saved_graph_fn = "anemoi-s3-httpsurl-sample-zarr-graph.pt"

args = argparse.Namespace(path=saved_graph_fn,
                          output_path="anemoi-s3-httpsurl-sample-zarr-graph-output-plots",
                          description='')
Inspect().run(args)


# Demo III: Training Module

Establish the following configuration files for training a GraphsTransformer model:

1) __Hardware Configuration Files:__

- /anemoi/training/config/hardware/path/example.yaml
- /anemoi/training/config/hardware/files/example.yaml
- /anemoi/training/config/hardware/example.yaml
    
2) __Data Configuration File:__    
    
- /anemoi/training/config/data/zarr.yaml
    
3) __Dataloader Configuration File:__    
        
- /anemoi/training/config/dataloader/native_grid.yaml
    
4) __Training Configuration File:__    
    
- /anemoi/training/config/training/default.yaml
    
5) __Graph Configuration Files:__    
        
- /anemoi/training/config/graph/multi_scale.yaml
- /anemoi/training/config/graph/encoder_decoder_only.yaml
    
6) __Model Configuration File:__    
            
- /anemoi/training/config/model/graphtransformer.yaml
    
7) __Diagnostic Configuration File:__    
        
- /anemoi/training/config/diagnostics/eval_rollout.yaml

### Generate training configuration files

### Import Training Module's Config & Train methods

In [None]:
from anemoi.training.commands.train import *
from anemoi.training.commands.config import *
import os, argparse
os.environ["HYDRA_FULL_ERROR"] = "1" 

### Train Model of Interest

In [None]:
# Train model of interest
args = argparse.Namespace(command="")

# Debug training module
#args = argparse.Namespace(command="--config-name=debug")

Train().run(args)


In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

list_center = [(0,0,0)]
list_radius = [1]

def plt_sphere(list_center, list_radius):
  for c, r in zip(list_center, list_radius):
    ax = fig.add_subplot(projection='3d')
    
    # draw sphere
    u, v = np.mgrid[0:2*np.pi:50j, 0:np.pi:50j]
    x = r*np.cos(u)*np.sin(v)
    y = r*np.sin(u)*np.sin(v)
    z = r*np.cos(v)

    ax.plot_surface(x-c[0], y-c[1], z-c[2], color=np.random.choice(['g','b']), alpha=0.5*np.random.random()+0.5)
fig = plt.figure()
plt_sphere(list_center, list_radius) 

In [None]:
import pickle

with open('longlat_reshaped_radians.pkl', 'rb') as f:
    longlat_reshaped_radians = pickle.load(f)
longlat_reshaped_radians

In [None]:
len(longlat_reshaped_radians)

In [None]:
import torch
unique_rows, indices = torch.unique(longlat_reshaped_radians, dim=0, return_inverse=True)

print(unique_rows)
print(indices)

In [None]:
import torch

def detect_duplicate_rows(tensor):
    """Detects duplicate rows in a 2D PyTorch tensor."""

    unique_rows, inverse_indices = torch.unique(tensor, dim=0, return_inverse=True)
    duplicate_mask = torch.bincount(inverse_indices) > 1
    duplicate_indices = (duplicate_mask[inverse_indices] == 1).nonzero(as_tuple=True)[0]

    return duplicate_indices

# Example usage:
# tensor = torch.tensor([
#     [1, 2],
#     [3, 4],
#     [1, 2],
#     [5, 6],
# ])
tensor=longlat_reshaped_radians

duplicate_indices = detect_duplicate_rows(tensor)
print("Duplicate row indices:", duplicate_indices)

In [None]:
import pickle

with open('longitudes_radians.pkl', 'rb') as f:
    longitudes = pickle.load(f)
    
with open('latitudes_radians.pkl', 'rb') as f:
    latitudes = pickle.load(f)


In [None]:
len(longitudes)

In [None]:
import pickle

with open('longitudes.pkl', 'rb') as f:
    longitudes = pickle
    
with open('latitudes.pkl', 'rb') as f:
    latitudes = pickle.load(f)


In [None]:
len(longitudes)

In [None]:
len(latitudes)

In [None]:
import torch

unique_values = torch.unique(latitudes)

print(unique_values)

In [None]:
len(latitudes)

In [None]:
import torch
# Find unique values and their counts
unique_values, counts = torch.unique(latitudes, return_counts=True)

# Identify duplicates
duplicates = unique_values[counts > 0]

print(duplicates) 

In [None]:
len(duplicates)

In [1]:
import pickle

with open('points.pkl', 'rb') as f:
    data = pickle.load(f)
data

array([[-4.3711388e-08, -0.0000000e+00, -1.0000000e+00],
       [ 1.7452383e-02,  0.0000000e+00, -9.9984771e-01],
       [ 3.4899492e-02,  0.0000000e+00, -9.9939084e-01],
       ...,
       [ 3.4894176e-02, -6.0908718e-04,  9.9939084e-01],
       [ 1.7449725e-02, -3.0458963e-04,  9.9984771e-01],
       [-4.3704731e-08,  7.6287776e-10,  1.0000000e+00]], dtype=float32)

In [12]:
from scipy.spatial import cKDTree
from scipy.spatial import SphericalVoronoi
points=data
threshold=1
radius=1.00000000000001

import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import cKDTree
print(points)
# plt.figure(figsize=(6, 6))
# plt.plot(points[:, 0], points[:, 1], ".k", markersize=1)
# kd_tree = cKDTree(points)
# pairs = kd_tree.query_pairs(r=threshold*radius)
# for (i, j) in pairs:
#     plt.plot([points[i, 0], points[j, 0]],
#             [points[i, 1], points[j, 1]], "*r")
# plt.show()

[[-4.3711388e-08 -0.0000000e+00 -1.0000000e+00]
 [ 1.7452383e-02  0.0000000e+00 -9.9984771e-01]
 [ 3.4899492e-02  0.0000000e+00 -9.9939084e-01]
 ...
 [ 3.4894176e-02 -6.0908718e-04  9.9939084e-01]
 [ 1.7449725e-02 -3.0458963e-04  9.9984771e-01]
 [-4.3704731e-08  7.6287776e-10  1.0000000e+00]]


In [None]:
# Find all pairs of points in self whose distance is at most r.
if cKDTree(points).query_pairs(r=threshold*radius):
    print(f"Pairs of points whose distance is <= {threshold*radius}.")
else:
    print(f"No pairs of points whose distance is <= {threshold*radius}.")

In [None]:
center=[0,0,0]
radii = np.linalg.norm(points - center, axis=1)
max_discrepancy = np.abs(radii - radius).max()
max_discrepancy

#if max_discrepancy >= threshold * radius:

In [None]:
threshold*radius

In [None]:
sv = SphericalVoronoi(points, 
                      radius=threshold*radius,
                      center=[0,0,0])

In [None]:
type(data)

In [None]:
data.shape

In [None]:
import numpy as np

def drop_duplicate_rows(arr):
    """Drops duplicate rows from a 2D NumPy array while maintaining order."""

    # Convert each row to a tuple, allowing us to use set operations
    arr_tuples = [tuple(row) for row in arr]

    # Create a set to track unique rows
    seen = set()
    result = []

    # Iterate through the rows, adding unique ones to the result
    for row in arr_tuples:
        if row not in seen:
            seen.add(row)
            result.append(row)

    # Convert the result back to a NumPy array
    return np.array(result)

# Example usage
points= drop_duplicate_rows(data)

In [None]:
points

In [None]:
from scipy.spatial import cKDTree
from scipy.spatial import SphericalVoronoi
points=data
threshold=1
radius=0.02

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import cKDTree
rng = np.random.default_rng()
points = rng.random((10, 2))
print(points)
plt.figure(figsize=(6, 6))
plt.plot(points[:, 0], points[:, 1], "xk", markersize=14)
kd_tree = cKDTree(points)
pairs = kd_tree.query_pairs(r=threshold*radius)
for (i, j) in pairs:
    plt.plot([points[i, 0], points[j, 0]],
            [points[i, 1], points[j, 1]], "-r")
plt.show()

In [None]:
# Find all pairs of points in self whose distance is at most r.
if cKDTree(points).query_pairs(r=threshold*radius):
    print(f"Pairs of points whose distance is <= {threshold*radius}.")
else:
    print(f"No pairs of points whose distance is <= {threshold*radius}.")

In [None]:

sv = SphericalVoronoi(points, 
                      radius=1,
                      center=[0,0,0],
                      threshold=1e-06)

In [None]:
type(test)

In [None]:
test.shape

In [None]:
import numpy as np

def drop_duplicate_rows(arr):
    """Drops duplicate rows from a 2D NumPy array while maintaining order."""

    # Convert each row to a tuple, allowing us to use set operations
    arr_tuples = [tuple(row) for row in arr]

    # Create a set to track unique rows
    seen = set()
    result = []

    # Iterate through the rows, adding unique ones to the result
    for row in arr_tuples:
        if row not in seen:
            seen.add(row)
            result.append(row)

    # Convert the result back to a NumPy array
    return np.array(result)

# Example usage
z= drop_duplicate_rows(test)

In [None]:
z.shape

In [None]:
org=data
import numpy as np
     
#displaying the original array
print("Original Array : ")
print(org,"\n")
     
new = np.lexsort(org.T) #pssing transpose of org array to lexsort()
 
 
new01 =  org[new,:]
#it gets the indices value given by "new array" and create a new01 array
 
x = np.concatenate(([True], np.any(np.diff(new01, axis=0), axis=1)))
result=np.array(new01[x])
 
#displaying the new array with updated/unique elements
print("Result Array : ")
print(result)

In [None]:
result.shape

### Generate Training Configuration files

In [None]:
# Generate training configuration files
# args = argparse.Namespace(subcommand='generate',
#                           overwrite=False,
#                           output='./training_configs_master')

# ConfigGenerator().run(args)