In [1]:
# Notebook: Workout problems
# Author: Ben Bernal
# Date: 29JUL2024

### Preliminaries

#### Libraries

In [2]:
import numpy as np
import pandas as pd
import typing as hint
from os import path, listdir
from pprint import pprint
import re
import plotly.express as px
import plotly.graph_objects as go
from dataclasses import make_dataclass

#### Utilities

In [3]:
from data_analysis.utilities import json as util_json

#### Configuration

In [4]:
env_config = util_json.to_dict(
    file_path="../../config/env.json"
)

# Dataset

## Pre-processing

In [5]:
files_pointer = path.normpath(
    path.join(
        env_config['root'],
        "modules/m5/OceanFlow"
    )
)

print(files_pointer)

/Users/apolo/Desktop/Analysis/Code/modules/m5/OceanFlow


In [6]:
file_lst = listdir(files_pointer)

pprint(file_lst)

['23v.csv',
 '74v.csv',
 '78u.csv',
 '97u.csv',
 '62v.csv',
 '9v.csv',
 '81u.csv',
 '5u.csv',
 '35v.csv',
 '39u.csv',
 '42u.csv',
 '19v.csv',
 '15u.csv',
 '100v.csv',
 '58v.csv',
 '54u.csv',
 '23u.csv',
 '74u.csv',
 '78v.csv',
 '97v.csv',
 '62u.csv',
 '9u.csv',
 '81v.csv',
 '5v.csv',
 '35u.csv',
 '39v.csv',
 '42v.csv',
 '19u.csv',
 '15v.csv',
 '58u.csv',
 '100u.csv',
 '54v.csv',
 '55v.csv',
 '59u.csv',
 '14v.csv',
 '18u.csv',
 '43v.csv',
 '38v.csv',
 '34u.csv',
 '4v.csv',
 '80v.csv',
 '63u.csv',
 '8u.csv',
 '79v.csv',
 '96v.csv',
 '75u.csv',
 '22u.csv',
 '55u.csv',
 'mask.csv',
 '59v.csv',
 '14u.csv',
 '18v.csv',
 '43u.csv',
 '38u.csv',
 '34v.csv',
 '4u.csv',
 '80u.csv',
 '63v.csv',
 '8v.csv',
 '79u.csv',
 '96u.csv',
 '75v.csv',
 '22v.csv',
 '25u.csv',
 '29v.csv',
 '72u.csv',
 '91v.csv',
 '64u.csv',
 '68v.csv',
 '87v.csv',
 '3v.csv',
 '33u.csv',
 '48u.csv',
 '44v.csv',
 '13v.csv',
 '52v.csv',
 '25v.csv',
 '29u.csv',
 '72v.csv',
 '91u.csv',
 '64v.csv',
 '68u.csv',
 '87u.csv',
 '3u.csv',

In [7]:
def digest_csv(pntr:str, file_name:str) -> hint.Tuple[int, str, pd.DataFrame]:
    """diggest csv and name"""
    file_name_lst = re.split(
        pattern="\.",
        string=file_name
    )
    prefix = file_name_lst[0]
    return int(prefix[:-1]), prefix[-1], pd.read_csv(
        path.normpath(
            path.join(
                env_config['root'],
                pntr,
                file_name
            )
        ),
        header = None
    )


In [8]:
digest_csv(
    pntr="modules/m5/OceanFlow",
    file_name='5v.csv'
)

(5,
 'v',
      0    1    2    3         4         5         6         7         8    \
 0      0    0    0  0.0  0.000000  0.000000  0.000000  0.000000  0.000000   
 1      0    0    0  0.0  0.000000  0.000000  0.000000  0.000000  0.000000   
 2      0    0    0  0.0 -0.323870 -0.317560 -0.321740 -0.336400 -0.351070   
 3      0    0    0  0.0 -0.670830 -0.662480 -0.671650 -0.698340 -0.725030   
 4      0    0    0  0.0 -0.717010 -0.717200 -0.728000 -0.749410 -0.770820   
 ..   ...  ...  ...  ...       ...       ...       ...       ...       ...   
 499    0    0    0  0.0  0.002582  0.004911  0.007059  0.009024  0.010989   
 500    0    0    0  0.0  0.009211  0.012976  0.015543  0.016913  0.018283   
 501    0    0    0  0.0  0.010437  0.014172  0.016486  0.017380  0.018273   
 502    0    0    0  0.0  0.004175  0.005668  0.006594  0.006952  0.007309   
 503    0    0    0  0.0  0.000000  0.000000  0.000000  0.000000  0.000000   
 
           9    ...      545      546      547      

In [39]:
data_dict = {}

for file_name in file_lst:
    if file_name == "mask.csv":
        continue

    time_idx, compo, frame = digest_csv(
        pntr="modules/m5/OceanFlow",
        file_name=file_name
    )
    frame = frame.astype(
        dtype="float64"
    )
    if data_dict.get(time_idx) is None:
        data_dict[time_idx] = {
            compo:frame
        }
    data_dict[time_idx][compo] = frame
    


In [40]:
sorted_data_dict = dict(
    sorted(
        data_dict.items()
    )
)

## 3D-Array

In [41]:
x_axis_array = np.stack(
    [
        frames['u'].to_numpy()
        for frames in sorted_data_dict.values()
    ],
    axis=-1
)

In [42]:
y_axis_array = np.stack(
    [
        frames['v'].to_numpy()
        for frames in sorted_data_dict.values()
    ],
    axis=-1
)

In [43]:
x_axis_array.shape

(504, 555, 100)

In [44]:
y_axis_array.shape

(504, 555, 100)

In [45]:

pd.DataFrame(x_axis_array[:,:,0]).equals(data_dict[1]['u'])

True