In [1]:
import path_configs # noqa
import tensorflow as tf
import settings
from datetime import datetime
import pandas as pd
import os
from pathlib import Path
import matplotlib.pyplot as plt
import scipy
import numpy as np
import librosa
settings.init()
import seaborn as sns
sns.set(font_scale=1.5, font="Arial", style="white")
# from modules.ClassifierGenerators import (TrainClassifierGenerator,  # noqa
#                                           ValidationClassifierGenerator)  # noqa

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

TRAIN_DATA_LEN: 3714569
VAL_DATA_LEN: 1590319


In [2]:
#assumes that array is not zero
def scaled(tensor):
    return (tensor-tf.math.reduce_min(tensor))/(tf.math.reduce_max(tensor)-tf.
                                                math.reduce_min(tensor))
def scaled_array(array):
    return (array - np.min(array))/(np.max(array) - np.min(array))

import scipy.spatial.distance as dist
def similarity(x1, x2):
    x1 = librosa.feature.mfcc(x1).flatten()
    x2 = scipy.signal.resample(librosa.feature.mfcc(x2).flatten(), len(x1))
    distance = dist.correlation(x1, x2)
    return 1/(1+distance)

from modules.DataPreprocessor import DataLoader
dl = DataLoader()
sample_rate = 48000
window_time = dl.window_time
frame_length = int(48000*window_time/1000)
frame_step = frame_length//4
def reverse_to_audio(db_spec):
    audio_rev_spec = librosa.db_to_power(db_spec, ref=1.0)
    print(
        'reversing'
    )
    return librosa.feature.inverse.mel_to_audio(audio_rev_spec, sr=48000, n_fft=frame_length, hop_length=frame_length//4)

In [3]:
train = pd.read_csv('data_info.csv')

In [4]:
# from tqdm import tqdm
# for set_name in ['test', 'train', 'val']:
#     for gender_name in ['male', 'female']:
#         for age_name in ['teens', 'twenties','seventies', 'fifties', 'fourties','thirties', 'sixties', 'eighties']:        
#             Path(os.path.join('data', 'images',set_name, gender_name, age_name)).mkdir(parents=True, exist_ok=True)

# for spec_num in tqdm(np.arange(dl.train.shape[0]+1)):
#     spec = dl.make_spectrogram(spec_num)
#     paded_spec = dl.pad_spec(spec)
#     set_type = dl.train.iloc[spec_num, 7]
#     age = dl.train.iloc[spec_num, 2]
#     gender = dl.train.iloc[spec_num, 3]
#     for window in np.arange(0, paded_spec.shape[1]-64, 64):
#         spec_window = paded_spec[:, window:window + 128]
#         name = str('spec_' + str(window//64) + '_' + str(spec_num))
#         np.save(os.path.join('data', 'images',set_type, gender, age, name), spec_window, allow_pickle = False)

In [5]:
def append_to_TFRecord(writer: tf.io.TFRecordWriter, x_dict: dict, y_dict: dict) -> None:
    """
    Append data to open writer.
    :param writer: TFRecordWriter
    :param x_dict: dict with np.arrays
    :param y_dict: dict with np.arrays
    :return: None
    """
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    features = dict()
    for key in x_dict.keys():
        features[key] = _bytes_feature(tf.compat.as_bytes(x_dict[key].astype(np.float32).tostring()))
    for key in y_dict.keys():
        features[key] = _bytes_feature(tf.compat.as_bytes(y_dict[key].astype(np.float32).tostring()))
    example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(example.SerializeToString())

In [6]:
from tqdm import tqdm
path = os.path.join('data', 'tf_record')
Path(path).mkdir(parents=True, exist_ok=True)
options = tf.io.TFRecordOptions(compression_level=1, compression_type="ZLIB")

for set_type in dl.train.set_type.unique():
    # setting set_type for tf.record file
    print('\nSaving', str(set_type), 'set to:', os.path.join(path, ('data_' + str(set_type) + '.tfrecord')))
    train_df = dl.train[dl.train.set_type == set_type]
    # mapping string labels to floats
    train_df.age = train_df.age.map({'teens': 15/100, 'twenties': 25/100,
                                        'seventies': 75/100, 'fifties': 55/100,
                                        'fourties': 45/100, 'thirties': 35/100,
                                        'sixties': 65/100, 'eighties': 85/100}
                                        )
    train_df.gender = train_df.gender.map({'male': 0, 'female': 1})

    with tf.io.TFRecordWriter(os.path.join(path, ('data_' + str(set_type) + '.tfrecord')), options=options) as writer:
        for spec_num in tqdm(train_df.index.to_numpy()):
            # getting spectrograms and labels from data_loader
            spec = dl.make_spectrogram(spec_num)
            paded_spec = dl.pad_spec(spec)
            age = train_df.loc[spec_num][2]
            gender = train_df.loc[spec_num][3]
            # windowing spectrograms
            for window in np.arange(0, paded_spec.shape[1]-128, 128):
                spec_window = paded_spec[:, window:window + 256]
                # making dicts for tfrecord writer
                mfcc = librosa.feature.mfcc(S=spec_window, n_mfcc=36)
                x_dict = {'x': np.array(spec_window), 'x_mfcc': np.array(mfcc)}
                y_dict = {'y_age':np.array(age), 'y_gender':np.array(gender)}
                append_to_TFRecord(writer=writer, x_dict=x_dict, y_dict=y_dict)


  0%|          | 0/30890 [00:00<?, ?it/s]
Saving train set to: data\tf_record\data_train.tfrecord
100%|██████████| 30890/30890 [1:04:55<00:00,  7.93it/s]
  0%|          | 1/7744 [00:00<18:42,  6.90it/s]
Saving val set to: data\tf_record\data_val.tfrecord
100%|██████████| 7744/7744 [16:50<00:00,  7.67it/s]
  1%|          | 1/96 [00:00<00:10,  8.69it/s]
Saving test set to: data\tf_record\data_test.tfrecord
100%|██████████| 96/96 [00:13<00:00,  7.29it/s]


In [15]:
x_dict = {'x': [128, 128, 1]}
y_dict = {'y_age':[1], 'y_gender':[1]}
dset = read_TFRecord(x_dict, y_dict, 2, os.path.join('data', 'tf_record', 'data_train.tfrecord'))

In [23]:
%%timeit
for X, y in dset.batch(64).take(1):
    pass

32.2 ms ± 214 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
tf.io.gfile.glob(os.path.join('data', 'tf_record', 'data_train.tfrecord'))

['data\\tf_record\\data_train.tfrecord']

In [7]:
# pathlist = Path(os.path.join('data', 'images')).rglob('*.npy')
# path_list = []
# for path in pathlist:
#     path_list.append(str(path))

# df = pd.DataFrame({'path':path_list})
# df['set_type'] = df['path'].apply(lambda string: os.path.normpath(string).split(os.sep)[2])
# df['gender'] = df['path'].apply(lambda string: os.path.normpath(string).split(os.sep)[3])
# df['age'] = df['path'].apply(lambda string: os.path.normpath(string).split(os.sep)[4])

# df = df.sample(frac = 1).reset_index(drop=True)
# df.to_csv(os.path.join('data', 'images', 'image_metadata.csv'), index=False)

In [9]:
lengths_cut = []
for spec_num in tqdm(np.arange(38781+1)):
    audio = np.array(dl.load_audio_binary(spec_num))
    cut_audio = dl.cut_voice(audio)
    lengths_cut.append(len(cut_audio))

100%|██████████| 38782/38782 [44:11<00:00, 14.63it/s]


In [14]:
np.sum(lengths_cut)/dl.train.length.sum()

0.6941762549697098

In [17]:
train[train.age == 'teens'].gender.value_counts()

male      3713
female    2285
Name: gender, dtype: int64

In [18]:
train.gender.value_counts( )

female    20005
male      18777
Name: gender, dtype: int64

In [10]:
from scipy.io.wavfile import write

scaled = np.int16(audio/np.max(np.abs(audio)) * 32767)
write('testOG.wav', 48000, scaled)

scaled2 = np.int16(cut_audio/np.max(np.abs(cut_audio)) * 32767)
write('test_recovered.wav', 48000, scaled2)