In [155]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.signal import stft
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [156]:
seed = 42
np.random.seed(seed)

In [157]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [158]:
from google.colab import files

In [159]:
files.upload()

Saving signalencoding_forcbow.py to signalencoding_forcbow (1).py


{'signalencoding_forcbow.py': b'# -*- coding: utf-8 -*-\n"""SignalEncoding_forCBOW.ipynb\n\nAutomatically generated by Colaboratory.\n\nOriginal file is located at\n    https://colab.research.google.com/drive/1VnN37o1QQ4SsdtueffkKBUF0ztoJsaKy\n"""\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import mean_squared_error\nfrom scipy.signal import stft\nimport os\n\nimport numpy as np\n\n\nclass SignalEncoding:\n    def __init__(self, sampling_rate, num_levels):\n        self.sampling_rate = sampling_rate\n        self.num_levels = num_levels\n        self.time_interval = 1 / self.sampling_rate # Calculate the time interval between samples\n        self.time_steps_between_samples = int(1 / self.time_interval) # Calculate the number of time steps between samples\n        self.step_size = None\n\n\n    def sample_signal(self, signal):\n        sampled_signal = signal[:, ::self.time_steps

In [160]:
import signalencoding_forcbow
from signalencoding_forcbow import SignalEncoding
from signalencoding_forcbow import DeltaEncoding

In [174]:
flow = np.load('/content/drive/MyDrive/CNN model data/lung_data/flow400.npy')
volume = np.load('/content/drive/MyDrive/CNN model data/lung_data/volume400.npy')
pressure = np.load('/content/drive/MyDrive/CNN model data/lung_data/paw400.npy')
#flow = pd.read_excel('/content/drive/MyDrive/spectrogram/flow.xlsx')
#volume = pd.read_excel('/content/drive/MyDrive/spectrogram/volume.xlsx')
#pressure = pd.read_excel('/content/drive/MyDrive/spectrogram/pressure.xlsx')
capacitances = np.load('/content/drive/MyDrive/CNN model data/lung_data/capacitances400.npy')

In [175]:
def normalize_data(data, minimum = None,maximum = None):
    if minimum is None:
        minimum = np.min(np.min(data))
    if maximum is None:
        maximum = np.max(np.max(data))
    data_norm = (data - minimum) / (maximum - minimum)
    return minimum, maximum, data_norm
(min_flow, max_flow, flow) = normalize_data(flow)
(min_volume, max_volume, volume) = normalize_data(volume)
(min_pressure, max_pressure, pressure) = normalize_data(pressure)
(min_capacitance, max_capacitance, capacitances) = normalize_data(capacitances)

In [176]:
flow = flow.T
volume = volume.T
pressure = pressure.T
capacitances = capacitances.T
flow = flow[:,:-1]
volume = volume[:,:-1]
pressure = pressure[:,:-1]
#flow = flow.to_numpy()
#volume = volume.to_numpy()
#pressure = pressure.to_numpy()

In [177]:
flow.shape

(400, 900)

**preparing the data by encoding it through quantization after extracting the signal change value information using the delta encoding method.**

In [178]:
sampling_rate = 50
num_levels = 16
signal_encoding = SignalEncoding(sampling_rate, num_levels)
quantized_flow = signal_encoding.sample_signal(flow)
quantized_pressure = signal_encoding.sample_signal(pressure)
quantized_volume = signal_encoding.sample_signal(volume)

In [179]:
print(quantized_flow.shape)

(400, 18)


In [180]:
delta_encoder = DeltaEncoding()
delta_encoded_flow = delta_encoder.delta_encode(quantized_flow)
delta_encoded_volume = delta_encoder.delta_encode(quantized_volume)
delta_encoded_pressure = delta_encoder.delta_encode(quantized_pressure)

In [181]:
input_signal = np.concatenate([delta_encoded_pressure, delta_encoded_volume, delta_encoded_flow], axis=1)
output_data = capacitances
indices = np.arange(400)

In [182]:
input_signal.shape

(400, 54)

**prepare train, test dataset**

In [183]:
input_train, input_test, output_train, output_test, indices_train, indices_test = \
    train_test_split(input_signal, output_data, indices, test_size=0.3, shuffle=False, random_state=11)

input_validation, input_test, output_validation, output_test, indices_validation, indices_test = \
    train_test_split(input_test, output_test, indices_test, test_size=0.5, shuffle=False, random_state=11)

In [184]:
input_train.shape

(280, 54)

In [185]:
zeros_beginning = np.zeros((280, 2))
zeros_end = np.zeros((280, 2))
train_cbow = np.concatenate((zeros_beginning, input_train, zeros_end), axis=1)

In [186]:
train_cbow.shape

(280, 58)

The input data for CBOW model request both context and target dataset

In [187]:
# separate my raw data to different window. For example, the orginal raw data was [1,2,3,4] and seprarate them to [0,0,1,2,3],[0,1,2,3,4],[1,2,3,4,0] ... ...
def sliding_window(data, window_size):
    windows = []
    for sample in data:
        sample_windows = []
        for i in range(len(sample) - window_size + 1):
            window = sample[i:i+window_size]
            sample_windows.append(window)
        windows.append(sample_windows)
    return windows

window_size = 5
train_cbow = sliding_window(train_cbow, window_size)
print(len(train_cbow[0]))

54


In [190]:
# select the target through each context,
train_cbow_context_target = []
for sample in train_cbow:
    sample_middle_numbers = [(np.hstack((array[:2], array[3:5])), array[2]) for array in sample] #keep the frist 2 data and last 2 data in the array, move the middle data outside 
    train_cbow_context_target.append(sample_middle_numbers)

In [191]:
#prepare the input and label data
data1 = train_cbow_context_target
train_data = []
label = []
for sample in data1:
  contexts, targets = zip(*sample)
  contexts = np.stack(contexts)
  targets = np.stack(targets)
  train_data.append(contexts)
  label.append(targets)

In [194]:
# Define the model architecture
class CBOW(nn.Module):
    def __init__(self, vocabulary_size, embedding_size, context_window_size):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocabulary_size, embedding_size)
        self.linear = nn.Linear(embedding_size, vocabulary_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).mean(dim=1)
        out = self.linear(embeds)
        return out

vocabulary_size= 1302
embedding_size = 10
context_window_size = 4
num_epochs = 50
model = CBOW(vocabulary_size, embedding_size, context_window_size)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
for epoch in range(num_epochs):
    total_loss = 0
    for context, target in zip(train_data, label):
        context = torch.tensor(context, dtype=torch.long)
        target = torch.tensor(target, dtype=torch.long)

        model.zero_grad()
        logits = model(context)
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, total_loss))


IndexError: ignored