In [1]:
import torch, torchvision
import sys # Python system library needed to load custom functions
import math # module with access to mathematical functions
import os # for changing the directory

import numpy as np  # for performing calculations on numerical arrays
import pandas as pd  # home of the DataFrame construct, _the_ most important object for Data Science
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt  # allows creation of insightful plots

sys.path.append('../audio_preprocessing')
sys.path.append('../src')
sys.path.append('../model_training_utils')


import preprocessing_func_2
import preprocessing_func_3
from gdsc_utils import PROJECT_DIR
import model_training
import model_eval

os.chdir(PROJECT_DIR) # changing our directory to root

In [2]:
df_big_data = pd.read_csv('data/big_data_processed_train_and_val.csv')
df_big_argumented_data = pd.read_csv('data/big_argumentation_data_train_and_val.csv')
df = pd.concat([df_big_data, df_big_argumented_data], ignore_index=True)
df.head()

Unnamed: 0.1,Unnamed: 0,file_path,label
0,0,data/big_data_processed_train_and_val/0.wav,56
1,1,data/big_data_processed_train_and_val/1.wav,56
2,2,data/big_data_processed_train_and_val/2.wav,56
3,3,data/big_data_processed_train_and_val/3.wav,56
4,4,data/big_data_processed_train_and_val/4.wav,56


In [None]:
# paths, labels = list(df["path"]), list(df["label"])

# generator = preprocessing_func_3.non_normalised_data_generator(
#     paths, 
#     labels,
# )

# mean, std, class_weights = preprocessing_func_3.get_stats_and_class_weights_of_non_normalised_data_gen(
#     generator,
#     (128, 512)
# )


In [None]:
# import json

# def save_as_json(path, description, mean, std, weights):
#     my_dict = {
#         "description": description,
#         "mean": float(mean),
#         "std": float(std),
#         "weights": list(class_wights.astype(float)),
#     }
#     with open(path, 'w') as f:
#         json.dump(my_dict, f)

# save_as_json(
#     "audio_preprocessing/saved_data/upsampled_data_size_128_512.json", 
#     "seconds 1.5, image shape (128,512)", mean, std, class_wights)

In [3]:
import json

with open('audio_preprocessing/saved_data/upsampled_data_size_128_512_train_and_val.json') as f:
    my_info = json.load(f)

mean, std, class_weights = my_info["mean"], my_info["std"], my_info["weights"]

In [4]:
wav_path = df["file_path"].values
labels = df["label"].values

In [5]:
wav_path

array(['data/big_data_processed_train_and_val/0.wav',
       'data/big_data_processed_train_and_val/1.wav',
       'data/big_data_processed_train_and_val/2.wav', ...,
       'data/big_data_upsample_train_and_val/65627.wav',
       'data/big_data_upsample_train_and_val/65628.wav',
       'data/big_data_upsample_train_and_val/65629.wav'], dtype=object)

In [6]:
labels

array([56, 56, 56, ..., 65, 65, 65], dtype=int64)

In [7]:
def wav_to_nomralised_image(
    wav_path, 
    mean,
    std,
    mel_transform_fn=preprocessing_func_2.calculate_melsp, 
    image_preprocess_fn=preprocessing_func_2.resize_function()
):
    i = 0
    new_paths_steam = "data/image_train_val_with_upsample"
    new_paths = []
    for path in wav_path:
        # load the wav into a numpy array
        wav = preprocessing_func_2.load_wav(path=path)
        # change the wav into a melspectrogram of shape (128, 530), where
        # the melspectrogram is a numpy array. Note all the wav files in
        # paths are 1.5 seconds long, so all the melspectrograms are of 
        # the same shape.
        db_mel_spec = mel_transform_fn(wav)
        # change the wav into a melspectrogram from a numpy array of shape
        # (128, 530) to a pytorch tensor of shape (1, 128, 530)
        db_mel_spec = preprocessing_func_2.to_reshaped_tensor(db_mel_spec)
        # change the pytorch tensor of shape (1, 128, 530) to shape (1, 128, 512)
        db_mel_spec = image_preprocess_fn(db_mel_spec)
        # normalise the pytorch tensor using mean and std calculated before
        db_mel_spec = preprocessing_func_2.normalise_image(db_mel_spec, mean, std)
        # save the normalised tensor to "data/image_train_val_with_upsample"
        torch.save(db_mel_spec, f"{new_paths_steam}/{i}.pt")
        new_paths.append(f"{new_paths_steam}/{i}.pt")
        i += 1
    return new_paths

In [8]:
new_paths = wav_to_nomralised_image(wav_path, mean, std)

In [9]:
image = torch.load("data/image_train_val_with_upsample/0.pt")

In [10]:
image

tensor([[[ 1.0756,  1.2185,  1.2806,  ...,  0.9880,  1.0924,  1.0940],
         [ 1.1905,  1.2817,  1.1888,  ...,  0.8558,  0.7310,  0.7372],
         [ 1.2310,  1.3087,  1.1965,  ...,  0.7955,  0.5853,  0.5278],
         ...,
         [-0.0763,  0.3012,  0.5926,  ..., -0.9725, -0.8316, -0.8199],
         [-0.2147,  0.3990,  0.7629,  ..., -0.9458, -0.8384, -0.8056],
         [ 0.1670,  0.5333,  0.8066,  ..., -0.9471, -0.8797, -0.8188]]])

In [11]:
my_dict = {
    "file_path": new_paths,
    "label": labels
}

processed_data_df = pd.DataFrame(my_dict)
processed_data_df.to_csv("data/normalised_image_train_val_with_upsample.csv")