In [1]:
from netCDF4 import Dataset
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import datetime
from tqdm import tqdm
from glob import glob
import pickle
import random
import os
import fnmatch

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as TorchDataset
import torch.nn.functional as F
from torchvision import datasets, transforms
from typing import Tuple, List, Type, Dict, Any
from torch.utils.tensorboard import SummaryWriter

from SGDR import CosineAnnealingWarmRestarts
from mish import Mish
from coord_conv import CoordConv
from MyResidualNetwork import MyResNet, MyBasicBlock
from MyDataPreparation import CustomDataset, Sampler
from autoencoder import Encoder, Decoder

In [2]:
def find_files(directory, pattern, maxdepth=None):
    flist = []
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                filename = filename.replace('\\\\', os.sep)
                if maxdepth is None:
                    flist.append(filename)
                else:
                    if filename.count(os.sep)-directory.count(os.sep) <= maxdepth:
                        flist.append(filename)
    return flist

In [3]:
wind_files_pkl = find_files('/mnt/hippocamp/asavin/data/wind/wind_arrays_kara_norm', '*.pkl')
wind_files_pkl.sort()

In [4]:
dataset = CustomDataset(wind_files_pkl, n_files=len(wind_files_pkl))

In [5]:
dataset.make_new_data()

In [None]:
dataset.wind_array.shape

In [None]:
channel_0_values_hrs = dataset.wind_array[:, 0, :, :].reshape(-1)
channel_1_values_hrs = dataset.wind_array[:, 1, :, :].reshape(-1)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.hist(channel_0_values_hrs, bins=100, color='blue', alpha=0.7)
plt.title("U hourly")
plt.xlabel("Value")
plt.ylabel("Frequency")

plt.subplot(1, 2, 2)
plt.hist(channel_1_values_hrs, bins=100, color='green', alpha=0.7)
plt.title("V hourly")
plt.xlabel("Value")
plt.ylabel("Frequency")

In [None]:
channel_0_values_hrs.mean(), channel_0_values_hrs.std(), channel_1_values_hrs.mean(), channel_1_values_hrs.std()

In [9]:
from MyDataPreparationLSTM import CustomDataset as CustomDatasetLSTM

In [10]:
datasetLSTM = CustomDatasetLSTM(wind_files_pkl, num_days=1, num_years=44)

In [11]:
datasetLSTM.select_random_years()

In [None]:
datasetLSTM.data.shape

In [None]:
channel_0_values_day = datasetLSTM.data[:, 0, :, :].reshape(-1)
channel_1_values_day = datasetLSTM.data[:, 1, :, :].reshape(-1)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.hist(channel_0_values_day, bins=100, color='blue', alpha=0.7)
plt.title("U daily")
plt.xlabel("Value")
plt.ylabel("Frequency")

plt.subplot(1, 2, 2)
plt.hist(channel_1_values_day, bins=100, color='green', alpha=0.7)
plt.title("V daily")
plt.xlabel("Value")
plt.ylabel("Frequency")

In [None]:
channel_0_values_day.mean(), channel_0_values_day.std(), channel_1_values_day.mean(), channel_1_values_day.std()