<a href="https://colab.research.google.com/github/diegompin/mtsa/blob/feature%2Fv0.0.8/examples/MTSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MTSA - **M**ultiple **T**ime **S**eries **A**nalysis


### Installing MTSA module:

In [None]:
!pip install mtsa

### Cloning MTSA repository:

*Obs: This step is necessary to access the example data.*

In [None]:
!git clone https://github.com/diegompin/mtsa.git

### Setting data directory:

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
path_input_1 = os.path.join(os.getcwd(),   "slider", "id_00")
path_input_2 = os.path.join(os.getcwd(),  "Data", "slider", "id_00")

In [None]:
import torch
import tensorflow as tf
gpuTorch = torch.cuda.is_available()
gpuTF = tf.test.is_gpu_available()
if gpuTorch:
    for i in range(torch.cuda.device_count()):
        print(torch.cuda.get_device_name(i))
gpus = tf.config.list_physical_devices('GPU')
print(gpus ,gpuTF)


assert len(gpus) < 0, "Not enough GPU hardware devices available"
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)


gpus

In [5]:
import sys
sys.path.append("..")

### Reading Data Files:

In [7]:
from mtsa import files_train_test_split
#path_input_1 = '/data/MIMII/fan/id_00/'
X_train, X_test, y_train, y_test = files_train_test_split(path_input_1)
if(len(y_train) == 0): 
    X_train, X_test, y_train, y_test = files_train_test_split(path_input_2)
y_train

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

### Preprocessing Mimii .wav files into MFCC arrays and generating chunks

In [None]:
from mtsa import Wav2Array, MFCCMix
import numpy as np
import pandas as pd
import torch as th
from sklearn.model_selection import train_test_split
import pickle as pkl



### Convert .wav files to MFCC


In [18]:

mfcc_transformer = MFCCMix()
train_mfcc = mfcc_transformer.transform(X_train)
test_mfcc = mfcc_transformer.transform(X_test)
mfcc_transformer.model

In [23]:
train_mfcc

array([[-4.99341400e+02,  1.53726074e+02, -2.19820137e+01, ...,
         4.95676720e-01,  2.68168420e-01,  4.07431707e-01],
       [-5.17619690e+02,  1.74352020e+02, -2.03603668e+01, ...,
         4.40008154e-01,  1.53263700e-01,  3.08171675e-01],
       [-5.15609192e+02,  1.67149139e+02, -1.33088789e+01, ...,
         3.90271473e-01,  1.23951276e-01,  3.03577201e-01],
       ...,
       [-5.01106354e+02,  1.75985626e+02, -2.64195805e+01, ...,
         4.20170733e-01,  1.70819244e-01,  3.25625659e-01],
       [-5.06160370e+02,  1.87026688e+02, -3.46673241e+01, ...,
         2.44849928e-01,  2.53015966e-01,  3.77102495e-02],
       [-4.98753662e+02,  1.56623764e+02, -2.58567467e+01, ...,
         5.21995859e-01,  2.38081958e-01,  4.50568186e-01]])

In [19]:
# Step 3: Generate chunks from MFCC arrays
def generate_chunks_from_array(data, chunk_size, chunk_stride):
    chunks = []
    for i in range(0, len(data) - chunk_size, chunk_stride):
        chunks.append(data[i:i + chunk_size])
    return np.array(chunks)

chunk_size = 1800  # Number of samples per chunk
chunk_stride = 600  # Stride for generating chunks

train_chunks = generate_chunks_from_array(train_mfcc, chunk_size, chunk_stride)
test_chunks = generate_chunks_from_array(test_mfcc, chunk_size, chunk_stride)

In [25]:
train_chunks

array([], dtype=float64)

In [24]:
# Step 4: Combine training and testing chunks for validation
all_chunks = np.concatenate([train_chunks, test_chunks])
all_chunks.shape

(0,)

In [None]:
# Step 5: Generate timestamps starting from 00:00:00
def generate_timestamps(num_chunks, start_time="00:00:00", interval_seconds=1):
    start = pd.Timestamp(start_time)
    return [start + pd.Timedelta(seconds=i * interval_seconds) for i in range(num_chunks)]

train_timestamps = generate_timestamps(len(train_chunks))
test_timestamps = generate_timestamps(len(test_chunks))
val_timestamps = generate_timestamps(len(val_chunks))

In [None]:
# Step 6: Save the chunks and timestamps
output_dir = "data/preprocessed_chunks"
os.makedirs(output_dir, exist_ok=True)

with open(os.path.join(output_dir, "train_chunks.pkl"), "wb") as f:
    pkl.dump(train_chunks, f)
with open(os.path.join(output_dir, "test_chunks.pkl"), "wb") as f:
    pkl.dump(test_chunks, f)
with open(os.path.join(output_dir, "val_chunks.pkl"), "wb") as f:
    pkl.dump(val_chunks, f)

with open(os.path.join(output_dir, "train_timestamps.pkl"), "wb") as f:
    pkl.dump(train_timestamps, f)
with open(os.path.join(output_dir, "test_timestamps.pkl"), "wb") as f:
    pkl.dump(test_timestamps, f)
with open(os.path.join(output_dir, "val_timestamps.pkl"), "wb") as f:
    pkl.dump(val_timestamps, f)

print("Preprocessing complete. Chunks and timestamps saved.")