# Analyze the range of values from the audio


In [5]:
from scipy.io import wavfile

In [7]:
wav_file = "../data/raw/dadosBruno/t02/" + \
            "t02_S-ISCA_C1_Aedes female-20-07-2017_1_001_456.wav"
wav = wavfile.read(wav_file)

In [1]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from joblib import dump, load

from src.data import make_dataset
from src.data import read_dataset
from src.data import util

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [6]:
# Set seed for reprodubility
np.random.seed(42)

In [3]:
from src.data.read_dataset import read_temperature

def make_temperatures(conversion, testing=False):
    num_cols = [x for x in range(11025)]
    save_cols = num_cols + ["label"]
    
    for i in range(2, 8):
        temperature = f"t0{i}"
        df = read_temperature(temperature, conversion)

        train_idx = df["training"] == 1  # get train data
        train_data = df.loc[train_idx]
        test_data = df.loc[~train_idx]
        
        # Create validation
        train_data, val_data = train_test_split(train_data, test_size=0.2)

        # Train scaler
        scaler = StandardScaler()
        scaler.fit(train_data[num_cols])
        dump(scaler, f"../data/interim/scaler_{conversion}_{temperature}.pkl")
        
        # Save the data as compressed numpy arrays
        np.savez_compressed(f"../data/interim/all_wavs_{conversion}_{temperature}", 
                            train=train_data[save_cols].astype(int), 
                            val=val_data[save_cols].astype(int), 
                            test=test_data[save_cols].astype(int))

In [4]:
make_temperatures("repeat")

In [5]:
make_temperatures("zero")