In [12]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from os.path import join
from dataset_dataloader import get_tri_train_val_dataloader

In [13]:
DATASET_RAW_DIR = "/public1/cjh/workspace/DepressionPrediction/dataset/EATD-Corpus"
TRAIN_DATASET_DIR = join(DATASET_RAW_DIR,"train")
VAL_DATASET_DIR = join(DATASET_RAW_DIR,"validation")

NDARRAY_DIR = "/public1/cjh/workspace/DepressionPrediction/dataset/raw_ndarray"
NDARRAY_TRAIN_DIR = join(NDARRAY_DIR,"train")

class WaveformTFDataset(Dataset):
    def __init__(self,):
        super(WaveformTFDataset,self).__init__()
        self.data_np = np.load(join(NDARRAY_TRAIN_DIR,"waveform_tf_raw.npz"))["arr_0"]
    
    def __len__(self,):
        return len(self.data_np)
    
    def __getitem__(self, index):
        return self.data_np[index]


In [5]:
wavefromtfdataset = WaveformTFDataset()
wavefromtfdataset

<__main__.WaveformTFDataset at 0x7f5cfdd46ec0>

In [7]:
for data in wavefromtfdataset:
    print(data.shape)
    break

(2, 1025, 1025)


In [10]:
class EmotinoHiddVecDataset(Dataset):
    def __init__(self,):
        super(EmotinoHiddVecDataset,self).__init__()
        self.data_np = np.load(join(NDARRAY_TRAIN_DIR,"emotion_hiddenvec_raw.npz"))["arr_0"]
    
    def __len__(self,):
        return len(self.data_np)
    
    def __getitem__(self, index):
        return self.data_np[index]

In [11]:
emotion_hidden_vec_dataset = EmotinoHiddVecDataset()
emotion_hidden_vec_dataset

<__main__.EmotinoHiddVecDataset at 0x7f5cfd028310>

In [12]:
for data in emotion_hidden_vec_dataset:
    print(data.shape)
    break

(1024,)


In [10]:
data_np = np.load(join(NDARRAY_TRAIN_DIR,"text_raw.npz"))["arr_0"]
data_np.shape

(249, 384)

In [11]:
len(data_np)

249

In [12]:
class TextVecDataset(Dataset):
    def __init__(self,):
        super(TextVecDataset,self).__init__()
        self.data_np = np.load(join(NDARRAY_TRAIN_DIR,"text_raw.npz"))["arr_0"]
    
    def __len__(self,):
        return len(self.data_np)
    
    def __getitem__(self, index):
        return self.data_np[index]

In [7]:
text_vec_dataset = TextVecDataset()
text_vec_dataset

<__main__.TextVecDataset at 0x7f227d431f90>

In [13]:
text_vec_dataset.__len__()

249

In [8]:
for data in text_vec_dataset:
    print(data.shape)
    break

(384,)


In [14]:
from torch.utils.data import random_split

train_dataset, test_dataset = random_split(text_vec_dataset,[0.7,0.3],generator = torch.Generator().manual_seed(42))

In [17]:
type(text_vec_dataset)

__main__.TextVecDataset

In [16]:
type(train_dataset)

torch.utils.data.dataset.Subset

### test dataloader


In [2]:
(
        waveform_tf_train_dataloader,
        waveform_tf_test_dataloader,
        emotion_hidd_vec_train_dataloader,
        emotion_hidd_vec_test_dataloader,
        text_vec_train_dataloader,
        text_vec_test_dataloader,
    ) = get_tri_train_val_dataloader()

In [3]:
train_data, train_label = next(iter(waveform_tf_train_dataloader))

In [5]:
train_data.shape

torch.Size([32, 2, 1025, 1025])

In [9]:
train_label

('32.500000',
 '53.750000',
 '31.250000',
 '36.250000',
 '31.250000',
 '45.000000',
 '47.500000',
 '47.500000',
 '42.500000',
 '66.250000',
 '51.250000',
 '37.500000',
 '33.750000',
 '53.750000',
 '28.750000',
 '52.500000',
 '36.250000',
 '41.250000',
 '56.250000',
 '50.000000',
 '40.000000',
 '52.500000',
 '38.750000',
 '53.750000',
 '41.250000',
 '60.000000',
 '32.500000',
 '71.250000',
 '45.000000',
 '33.750000',
 '56.250000',
 '43.750000')

In [10]:
_, train_label_2 = next(iter(emotion_hidd_vec_train_dataloader))

In [11]:
train_label_2

('36.250000',
 '33.750000',
 '43.750000',
 '31.250000',
 '48.750000',
 '52.500000',
 '53.750000',
 '58.750000',
 '43.750000',
 '33.750000',
 '48.750000',
 '53.750000',
 '68.750000',
 '36.250000',
 '40.000000',
 '33.750000',
 '76.250000',
 '36.250000',
 '43.750000',
 '43.750000',
 '71.250000',
 '67.500000',
 '45.000000',
 '50.000000',
 '37.500000',
 '53.750000',
 '51.250000',
 '32.500000',
 '50.000000',
 '50.000000',
 '40.000000',
 '36.250000')