In [1]:
!pip install scikit-learn



# Checking if GPU is in use

In [2]:
pip show tensorflow


Name: tensorflowNote: you may need to restart the kernel to use updated packages.

Version: 2.12.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: c:\users\l_alm\miniconda3\envs\pytorch_env\lib\site-packages
Requires: tensorflow-intel
Required-by: 


In [3]:
import tensorflow as tf
print("TensorFlow Version:", tf.__version__)
print("GPU Devices:", tf.config.list_physical_devices('GPU'))
from tensorflow.python.client import device_lib
print("Local Devices:", device_lib.list_local_devices())



TensorFlow Version: 2.12.0
GPU Devices: []
Local Devices: [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13426814924348881434
xla_global_id: -1
]


In [4]:
import tensorflow as tf
print(tf.__version__)


2.12.0


In [5]:
import torch
print(torch.cuda.is_available())


True


In [6]:
import torch
print(torch.cuda.get_device_name(0))


NVIDIA GeForce GTX 1660 Ti with Max-Q Design


In [7]:
import os
print("Current Working Directory:", os.getcwd())


Current Working Directory: C:\Users\l_alm


In [8]:

print(os.listdir())



['.anaconda', '.bash_history', '.cache', '.conda', '.condarc', '.config', '.continuum', '.dropbox_bi', '.gitconfig', '.grasp_settings', '.idlerc', '.ipynb_checkpoints', '.ipython', '.jupyter', '.keras', '.lesshst', '.m2', '.matplotlib', '.ms-ad', '.nbi', '.packettracer', '.spyder-py3', '.ssh', '.vscode', '1D-ResNet-SE-LSTM-main', '3D Objects', 'ansel', 'AppData', 'Apple', 'Application Data', 'attempt2.ipynb', 'cifar 10 classification.ipynb', 'Cisco Packet Tracer 8.2.2', 'Contacts', 'Cookies', 'CSC 340', 'cudnn-windows-x86_64-8.6.0.163_cuda11-archive', 'customerTargeting.csv', 'data', 'Documents', 'Downloads', 'Dropbox', 'ds-phase2', 'euclidean_distance.c', 'Favorites', 'fmri.mat', 'gcc', 'Homework_1.ipynb', 'IntelGraphicsProfiles', 'Jedi', 'jjjj', 'leen', 'Links', 'Local Settings', 'mingw-get-setup.exe', 'miniconda3', 'mkscancer-master', 'Music', 'My Documents', 'myenv', 'NetHood', 'NTUSER.DAT', 'ntuser.dat.LOG1', 'ntuser.dat.LOG2', 'NTUSER.DAT{2ad838bc-efea-11ee-a54d-000d3a94eaa1}.TM.

In [9]:
!pip install tqdm




In [10]:
!pip install matplotlib




In [11]:
!pip install pandas




In [12]:
!pip install tensorboardX




In [13]:
!pip install torchsummary




# Adjusting to directory

In [14]:
import os
os.chdir(r'C:\Users\l_alm\resnet1d-master')  #adjust this to your base directory if needed
print(f"Current working directory: {os.getcwd()}")


Current working directory: C:\Users\l_alm\resnet1d-master


In [15]:
!pip install --upgrade torch torchvision




In [16]:
!pip install keras




In [17]:
pip install tensorflow


Note: you may need to restart the kernel to use updated packages.


In [18]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
print("Successfully imported pad_sequences from TensorFlow Keras")


Successfully imported pad_sequences from TensorFlow Keras


# checkng if there is enough memory for training

> Add blockquote



In [19]:
import psutil

memory = psutil.virtual_memory()
print(f"Available memory: {memory.available / 1e9:.2f} GB")
print(f"Total memory: {memory.total / 1e9:.2f} GB")


Available memory: 2.22 GB
Total memory: 16.94 GB


In [20]:
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import scipy.io
from matplotlib import pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm


def read_data_generated(n_samples, n_length, n_channel, n_classes, verbose=False):
    """
    Generated data

    This generated data contains one noise channel class, plus unlimited number of sine channel classes which are different on frequency.

    """
    all_X = []
    all_Y = []

    # noise channel class
    X_noise = np.random.rand(n_samples, n_channel, n_length)
    Y_noise = np.array([0]*n_samples)
    all_X.append(X_noise)
    all_Y.append(Y_noise)

    # sine channel classe
    x = np.arange(n_length)
    for i_class in range(n_classes-1):
        scale = 2**i_class
        offset_list = 2*np.pi*np.random.rand(n_samples)
        X_sin = []
        for i_sample in range(n_samples):
            tmp_x = []
            for i_channel in range(n_channel):
                tmp_x.append(np.sin(x/scale+2*np.pi*np.random.rand()))
            X_sin.append(tmp_x)
        X_sin = np.array(X_sin)
        Y_sin = np.array([i_class+1]*n_samples)
        all_X.append(X_sin)
        all_Y.append(Y_sin)

    # combine and shuffle
    all_X = np.concatenate(all_X)
    all_Y = np.concatenate(all_Y)
    shuffle_idx = np.random.permutation(all_Y.shape[0])
    all_X = all_X[shuffle_idx]
    all_Y = all_Y[shuffle_idx]

    # random pick some and plot
    if verbose:
        for _ in np.random.permutation(all_Y.shape[0])[:10]:
            fig = plt.figure()
            plt.plot(all_X[_,0,:])
            plt.title('Label: {0}'.format(all_Y[_]))

    return all_X, all_Y


#if __name__ == "__main__":
  #  read_data_physionet_2_clean_federated(m_clients=4)

# 1d resnet model definition

In [21]:
"""
resnet for 1-d signal data, pytorch version

Shenda Hong, Oct 2019
"""

import numpy as np
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        return (torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.label[index], dtype=torch.long))

    def __len__(self):
        return len(self.data)

class MyConv1dPadSame(nn.Module):
    """
    extend nn.Conv1d to support SAME padding
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1):
        super(MyConv1dPadSame, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.groups = groups
        self.conv = torch.nn.Conv1d(
            in_channels=self.in_channels,
            out_channels=self.out_channels,
            kernel_size=self.kernel_size,
            stride=self.stride,
            groups=self.groups)

    def forward(self, x):

        net = x

        # compute pad shape
        in_dim = net.shape[-1]
        out_dim = (in_dim + self.stride - 1) // self.stride
        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
        pad_left = p // 2
        pad_right = p - pad_left
        net = F.pad(net, (pad_left, pad_right), "constant", 0)

        net = self.conv(net)

        return net

class MyMaxPool1dPadSame(nn.Module):
    """
    extend nn.MaxPool1d to support SAME padding
    """
    def __init__(self, kernel_size):
        super(MyMaxPool1dPadSame, self).__init__()
        self.kernel_size = kernel_size
        self.stride = 1
        self.max_pool = torch.nn.MaxPool1d(kernel_size=self.kernel_size)

    def forward(self, x):

        net = x

        # compute pad shape
        in_dim = net.shape[-1]
        out_dim = (in_dim + self.stride - 1) // self.stride
        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
        pad_left = p // 2
        pad_right = p - pad_left
        net = F.pad(net, (pad_left, pad_right), "constant", 0)

        net = self.max_pool(net)

        return net

class BasicBlock(nn.Module):
    """
    ResNet Basic Block
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, groups, downsample, use_bn, use_do, is_first_block=False):
        super(BasicBlock, self).__init__()

        self.in_channels = in_channels
        self.kernel_size = kernel_size
        self.out_channels = out_channels
        self.stride = stride
        self.groups = groups
        self.downsample = downsample
        if self.downsample:
            self.stride = stride
        else:
            self.stride = 1
        self.is_first_block = is_first_block
        self.use_bn = use_bn
        self.use_do = use_do

        # the first conv
        self.bn1 = nn.BatchNorm1d(in_channels)
        self.relu1 = nn.ReLU()
        self.do1 = nn.Dropout(p=0.5)
        self.conv1 = MyConv1dPadSame(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=self.stride,
            groups=self.groups)

        # the second conv
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu2 = nn.ReLU()
        self.do2 = nn.Dropout(p=0.5)
        self.conv2 = MyConv1dPadSame(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=1,
            groups=self.groups)

        self.max_pool = MyMaxPool1dPadSame(kernel_size=self.stride)

    def forward(self, x):

        identity = x

        # the first conv
        out = x
        if not self.is_first_block:
            if self.use_bn:
                out = self.bn1(out)
            out = self.relu1(out)
            if self.use_do:
                out = self.do1(out)
        out = self.conv1(out)

        # the second conv
        if self.use_bn:
            out = self.bn2(out)
        out = self.relu2(out)
        if self.use_do:
            out = self.do2(out)
        out = self.conv2(out)

        # if downsample, also downsample identity
        if self.downsample:
            identity = self.max_pool(identity)

        # if expand channel, also pad zeros to identity
        if self.out_channels != self.in_channels:
            identity = identity.transpose(-1,-2)
            ch1 = (self.out_channels-self.in_channels)//2
            ch2 = self.out_channels-self.in_channels-ch1
            identity = F.pad(identity, (ch1, ch2), "constant", 0)
            identity = identity.transpose(-1,-2)

        # shortcut
        out += identity

        return out

class ResNet1D(nn.Module):
    """

    Input:
        X: (n_samples, n_channel, n_length)
        Y: (n_samples)

    Output:
        out: (n_samples)

    Pararmetes:
        in_channels: dim of input, the same as n_channel
        base_filters: number of filters in the first several Conv layer, it will double at every 4 layers
        kernel_size: width of kernel
        stride: stride of kernel moving
        groups: set larget to 1 as ResNeXt
        n_block: number of blocks
        n_classes: number of classes

    """

    def __init__(self, in_channels, base_filters, kernel_size, stride, groups, n_block, n_classes, downsample_gap=2, increasefilter_gap=4, use_bn=True, use_do=True, verbose=False):
        super(ResNet1D, self).__init__()

        self.verbose = verbose
        self.n_block = n_block
        self.kernel_size = kernel_size
        self.stride = stride
        self.groups = groups
        self.use_bn = use_bn
        self.use_do = use_do

        self.downsample_gap = downsample_gap # 2 for base model
        self.increasefilter_gap = increasefilter_gap # 4 for base model

        # first block
        self.first_block_conv = MyConv1dPadSame(in_channels=in_channels, out_channels=base_filters, kernel_size=self.kernel_size, stride=1)
        self.first_block_bn = nn.BatchNorm1d(base_filters)
        self.first_block_relu = nn.ReLU()
        out_channels = base_filters

        # residual blocks
        self.basicblock_list = nn.ModuleList()
        for i_block in range(self.n_block):
            # is_first_block
            if i_block == 0:
                is_first_block = True
            else:
                is_first_block = False
            # downsample at every self.downsample_gap blocks
            if i_block % self.downsample_gap == 1:
                downsample = True
            else:
                downsample = False
            # in_channels and out_channels
            if is_first_block:
                in_channels = base_filters
                out_channels = in_channels
            else:
                # increase filters at every self.increasefilter_gap blocks
                in_channels = int(base_filters*2**((i_block-1)//self.increasefilter_gap))
                if (i_block % self.increasefilter_gap == 0) and (i_block != 0):
                    out_channels = in_channels * 2
                else:
                    out_channels = in_channels

            tmp_block = BasicBlock(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=self.kernel_size,
                stride = self.stride,
                groups = self.groups,
                downsample=downsample,
                use_bn = self.use_bn,
                use_do = self.use_do,
                is_first_block=is_first_block)
            self.basicblock_list.append(tmp_block)

        # final prediction
        self.final_bn = nn.BatchNorm1d(out_channels)
        self.final_relu = nn.ReLU(inplace=True)
        # self.do = nn.Dropout(p=0.5)
        self.dense = nn.Linear(out_channels, n_classes)
        # self.softmax = nn.Softmax(dim=1)

    def forward(self, x):

        out = x

        # first conv
        if self.verbose:
            print('input shape', out.shape)
        out = self.first_block_conv(out)
        if self.verbose:
            print('after first conv', out.shape)
        if self.use_bn:
            out = self.first_block_bn(out)
        out = self.first_block_relu(out)

        # residual blocks, every block has two conv
        for i_block in range(self.n_block):
            net = self.basicblock_list[i_block]
            if self.verbose:
                print('i_block: {0}, in_channels: {1}, out_channels: {2}, downsample: {3}'.format(i_block, net.in_channels, net.out_channels, net.downsample))
            out = net(out)
            if self.verbose:
                print(out.shape)

        # final prediction
        if self.use_bn:
            out = self.final_bn(out)
        out = self.final_relu(out)
        out = out.mean(-1)
        if self.verbose:
            print('final pooling', out.shape)
        # out = self.do(out)
        out = self.dense(out)
        if self.verbose:
            print('dense', out.shape)
        # out = self.softmax(out)
        if self.verbose:
            print('softmax', out.shape)

        return out

In [22]:
"""
resnet for 1-d signal data, pytorch version

Shenda Hong, Oct 2019
"""

import numpy as np
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        return (torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.label[index], dtype=torch.long))

    def __len__(self):
        return len(self.data)

class MyConv1dPadSame(nn.Module):
    """
    extend nn.Conv1d to support SAME padding
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1):
        super(MyConv1dPadSame, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.groups = groups
        self.conv = torch.nn.Conv1d(
            in_channels=self.in_channels,
            out_channels=self.out_channels,
            kernel_size=self.kernel_size,
            stride=self.stride,
            groups=self.groups)

    def forward(self, x):

        net = x

        # compute pad shape
        in_dim = net.shape[-1]
        out_dim = (in_dim + self.stride - 1) // self.stride
        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
        pad_left = p // 2
        pad_right = p - pad_left
        net = F.pad(net, (pad_left, pad_right), "constant", 0)

        net = self.conv(net)

        return net

class MyMaxPool1dPadSame(nn.Module):
    """
    extend nn.MaxPool1d to support SAME padding
    """
    def __init__(self, kernel_size):
        super(MyMaxPool1dPadSame, self).__init__()
        self.kernel_size = kernel_size
        self.stride = 1
        self.max_pool = torch.nn.MaxPool1d(kernel_size=self.kernel_size)

    def forward(self, x):

        net = x

        # compute pad shape
        in_dim = net.shape[-1]
        out_dim = (in_dim + self.stride - 1) // self.stride
        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
        pad_left = p // 2
        pad_right = p - pad_left
        net = F.pad(net, (pad_left, pad_right), "constant", 0)

        net = self.max_pool(net)

        return net

class BasicBlock(nn.Module):
    """
    ResNet Basic Block
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, groups, downsample, use_bn, use_do, is_first_block=False):
        super(BasicBlock, self).__init__()

        self.in_channels = in_channels
        self.kernel_size = kernel_size
        self.out_channels = out_channels
        self.stride = stride
        self.groups = groups
        self.downsample = downsample
        if self.downsample:
            self.stride = stride
        else:
            self.stride = 1
        self.is_first_block = is_first_block
        self.use_bn = use_bn
        self.use_do = use_do

        # the first conv
        self.bn1 = nn.BatchNorm1d(in_channels)
        self.relu1 = nn.ReLU()
        self.do1 = nn.Dropout(p=0.5)
        self.conv1 = MyConv1dPadSame(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=self.stride,
            groups=self.groups)

        # the second conv
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu2 = nn.ReLU()
        self.do2 = nn.Dropout(p=0.5)
        self.conv2 = MyConv1dPadSame(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=1,
            groups=self.groups)

        self.max_pool = MyMaxPool1dPadSame(kernel_size=self.stride)

    def forward(self, x):

        identity = x

        # the first conv
        out = x
        if not self.is_first_block:
            if self.use_bn:
                out = self.bn1(out)
            out = self.relu1(out)
            if self.use_do:
                out = self.do1(out)
        out = self.conv1(out)

        # the second conv
        if self.use_bn:
            out = self.bn2(out)
        out = self.relu2(out)
        if self.use_do:
            out = self.do2(out)
        out = self.conv2(out)

        # if downsample, also downsample identity
        if self.downsample:
            identity = self.max_pool(identity)

        # if expand channel, also pad zeros to identity
        if self.out_channels != self.in_channels:
            identity = identity.transpose(-1,-2)
            ch1 = (self.out_channels-self.in_channels)//2
            ch2 = self.out_channels-self.in_channels-ch1
            identity = F.pad(identity, (ch1, ch2), "constant", 0)
            identity = identity.transpose(-1,-2)

        # shortcut
        out += identity

        return out

class ResNet1D(nn.Module):
    """

    Input:
        X: (n_samples, n_channel, n_length)
        Y: (n_samples)

    Output:
        out: (n_samples)

    Pararmetes:
        in_channels: dim of input, the same as n_channel
        base_filters: number of filters in the first several Conv layer, it will double at every 4 layers
        kernel_size: width of kernel
        stride: stride of kernel moving
        groups: set larget to 1 as ResNeXt
        n_block: number of blocks
        n_classes: number of classes

    """

    def __init__(self, in_channels, base_filters, kernel_size, stride, groups, n_block, n_classes, downsample_gap=2, increasefilter_gap=4, use_bn=True, use_do=True, verbose=False):
        super(ResNet1D, self).__init__()

        self.verbose = verbose
        self.n_block = n_block
        self.kernel_size = kernel_size
        self.stride = stride
        self.groups = groups
        self.use_bn = use_bn
        self.use_do = use_do

        self.downsample_gap = downsample_gap # 2 for base model
        self.increasefilter_gap = increasefilter_gap # 4 for base model

        # first block
        self.first_block_conv = MyConv1dPadSame(in_channels=in_channels, out_channels=base_filters, kernel_size=self.kernel_size, stride=1)
        self.first_block_bn = nn.BatchNorm1d(base_filters)
        self.first_block_relu = nn.ReLU()
        out_channels = base_filters

        # residual blocks
        self.basicblock_list = nn.ModuleList()
        for i_block in range(self.n_block):
            # is_first_block
            if i_block == 0:
                is_first_block = True
            else:
                is_first_block = False
            # downsample at every self.downsample_gap blocks
            if i_block % self.downsample_gap == 1:
                downsample = True
            else:
                downsample = False
            # in_channels and out_channels
            if is_first_block:
                in_channels = base_filters
                out_channels = in_channels
            else:
                # increase filters at every self.increasefilter_gap blocks
                in_channels = int(base_filters*2**((i_block-1)//self.increasefilter_gap))
                if (i_block % self.increasefilter_gap == 0) and (i_block != 0):
                    out_channels = in_channels * 2
                else:
                    out_channels = in_channels

            tmp_block = BasicBlock(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=self.kernel_size,
                stride = self.stride,
                groups = self.groups,
                downsample=downsample,
                use_bn = self.use_bn,
                use_do = self.use_do,
                is_first_block=is_first_block)
            self.basicblock_list.append(tmp_block)

        # final prediction
        self.final_bn = nn.BatchNorm1d(out_channels)
        self.final_relu = nn.ReLU(inplace=True)
        # self.do = nn.Dropout(p=0.5)
        self.dense = nn.Linear(out_channels, n_classes)
        # self.softmax = nn.Softmax(dim=1)

    def forward(self, x):

        out = x

        # first conv
        if self.verbose:
            print('input shape', out.shape)
        out = self.first_block_conv(out)
        if self.verbose:
            print('after first conv', out.shape)
        if self.use_bn:
            out = self.first_block_bn(out)
        out = self.first_block_relu(out)

        # residual blocks, every block has two conv
        for i_block in range(self.n_block):
            net = self.basicblock_list[i_block]
            if self.verbose:
                print('i_block: {0}, in_channels: {1}, out_channels: {2}, downsample: {3}'.format(i_block, net.in_channels, net.out_channels, net.downsample))
            out = net(out)
            if self.verbose:
                print(out.shape)

        # final prediction
        if self.use_bn:
            out = self.final_bn(out)
        out = self.final_relu(out)
        out = out.mean(-1)
        if self.verbose:
            print('final pooling', out.shape)
        # out = self.do(out)
        out = self.dense(out)
        if self.verbose:
            print('dense', out.shape)
        # out = self.softmax(out)
        if self.verbose:
            print('softmax', out.shape)

        return out

In [23]:
import numpy as np
import pandas as pd
import scipy.io
from matplotlib import pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm


def read_data_generated(n_samples, n_length, n_channel, n_classes, verbose=False):
    """
    Generated data

    This generated data contains one noise channel class, plus unlimited number of sine channel classes which are different on frequency.

    """
    all_X = []
    all_Y = []

    # noise channel class
    X_noise = np.random.rand(n_samples, n_channel, n_length)
    Y_noise = np.array([0]*n_samples)
    all_X.append(X_noise)
    all_Y.append(Y_noise)

    # sine channel classe
    x = np.arange(n_length)
    for i_class in range(n_classes-1):
        scale = 2**i_class
        offset_list = 2*np.pi*np.random.rand(n_samples)
        X_sin = []
        for i_sample in range(n_samples):
            tmp_x = []
            for i_channel in range(n_channel):
                tmp_x.append(np.sin(x/scale+2*np.pi*np.random.rand()))
            X_sin.append(tmp_x)
        X_sin = np.array(X_sin)
        Y_sin = np.array([i_class+1]*n_samples)
        all_X.append(X_sin)
        all_Y.append(Y_sin)

    # combine and shuffle
    all_X = np.concatenate(all_X)
    all_Y = np.concatenate(all_Y)
    shuffle_idx = np.random.permutation(all_Y.shape[0])
    all_X = all_X[shuffle_idx]
    all_Y = all_Y[shuffle_idx]

    # random pick some and plot
    if verbose:
        for _ in np.random.permutation(all_Y.shape[0])[:10]:
            fig = plt.figure()
            plt.plot(all_X[_,0,:])
            plt.title('Label: {0}'.format(all_Y[_]))

    return all_X, all_Y


#if __name__ == "__main__":
  #  read_data_physionet_2_clean_federated(m_clients=4)

In [24]:
!pip install tensorboardX



# Preprocessing function has scaling and encoding..

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
def preprocess_data(file_path):
    # Load dataset
    df = pd.read_csv(file_path)

    # Handle missing values (drop rows with missing data)
    df = df.dropna()

    # Encode categorical columns
    categorical_cols = df.select_dtypes(include=['object']).columns
    label_encoders = {}
    for col in categorical_cols:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

    # Separate features and labels
    X = df.drop("target", axis=1)
    y = df["target"]

    # Normalize numerical features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split into train and test sets
    n_samples , n_features = X.shape
    n_channel = n_features
    n_length = 1

    X = X.reshape(n_samples, n_channel, n_length)



    return X ,y

In [26]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE

def preprocess_data_with_smote(file_path):
    """
    Preprocess the input CSV data for ResNet1D with SMOTE.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        tuple: Processed data (X), labels (y).
    """
    # Load dataset
    df = pd.read_csv(file_path)

    # Handle missing values (drop rows with missing data)
    df = df.dropna()

    # Encode categorical columns
    categorical_cols = df.select_dtypes(include=['object']).columns
    label_encoders = {}
    for col in categorical_cols:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

    # Separate features and labels
    X = df.drop("target", axis=1).values
    y = df["target"].values

    # Normalize numerical features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Apply SMOTE to balance the classes
    smote = SMOTE(random_state=42)
    X, y = smote.fit_resample(X, y)

    # Reshape data for ResNet1D
    n_samples, n_features = X.shape
    n_channel = n_features
    n_length = 1

    X = X.reshape(n_samples, n_channel, n_length)

    return X, y



In [27]:
!pip install imbalanced-learn




# Model Training and evaluating

In [28]:
import numpy as np
from sklearn.metrics import f1_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from imblearn.over_sampling import SMOTE
# Assuming the preprocess_data, MyDataset, and ResNet1D definitions are correct and loaded above this code

# Load and preprocess data
#data, label = preprocess_data(r'C:\Users\l_alm\resnet1d-master\content\customerTargeting.csv')
data, label = preprocess_data_with_smote(r'C:\Users\l_alm\resnet1d-master\content\customerTargeting.csv')
print(data.shape, Counter(label))
dataset = MyDataset(data, label)

# Split dataset and prepare DataLoaders
train_set, val_set = torch.utils.data.random_split(dataset, [5000, 1620])
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(val_set, batch_size=64, shuffle=True)

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_classes = 3

model = ResNet1D(
    in_channels=70, #NO of features
    base_filters=64,#on the authors github repository he added a comment that 64 is for resnet1d
    kernel_size=1, #each feature processed independently
    stride=1,
    n_block=8,#to align with ResNet18
    groups=1,
    n_classes=n_classes,
    downsample_gap=1,#downsampling happens at every block ->  increases the efficiency of feature compression
    increasefilter_gap=8,
    verbose=False
)



model.to(device)

# Optimizer, loss function, and scheduler
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-4)  # Lower learning rate
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
class_weights = compute_class_weight('balanced', classes=np.unique(label), y=label)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

loss_func = torch.nn.CrossEntropyLoss(weight=class_weights)

#early stopping parameters
early_stopping_patience = 20
best_val_loss = float('inf')
patience_counter = 0

#training settings
n_epoch = 200

# Training and validation loop
for epoch in range(n_epoch):
    # Training
    model.train()
    train_loss = 0
    all_train_labels = []
    all_train_preds = []

    for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}", leave=False):
        input_x, input_y = tuple(t.to(device) for t in batch)
        pred = model(input_x)
        loss = loss_func(pred, input_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        train_loss += loss.item()
        all_train_labels.extend(input_y.cpu().numpy())
        all_train_preds.extend(pred.argmax(dim=1).cpu().numpy())

    # Compute training F1 score
    train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train F1 = {train_f1:.4f}")

    # Validation
    model.eval()
    val_loss = 0
    all_val_labels = []
    all_val_preds = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc=f"Validation Epoch {epoch+1}", leave=False):
            input_x, input_y = tuple(t.to(device) for t in batch)
            pred = model(input_x)
            loss = loss_func(pred, input_y)
            val_loss += loss.item()
            all_val_labels.extend(input_y.cpu().numpy())
            all_val_preds.extend(pred.argmax(dim=1).cpu().numpy())

    # Compute validation F1 score
    val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted')
    print(f"Epoch {epoch+1}: Val Loss = {val_loss:.4f}, Val F1 = {val_f1:.4f}")
    scheduler.step(val_loss)

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "best_model.pth")  # Save the best model
    else:
        patience_counter += 1

    if patience_counter >= early_stopping_patience:
        print("Early stopping triggered!")
        break



# Final evaluation
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

all_pred_prob = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Final Testing", leave=False):
        input_x, input_y = tuple(t.to(device) for t in batch)
        pred = model(input_x)
        all_pred_prob.append(pred.cpu().data.numpy())

all_pred_prob = np.concatenate(all_pred_prob)
all_pred = np.argmax(all_pred_prob, axis=1)

all_test_labels = []
for _, labels in test_loader:
    all_test_labels.extend(labels.numpy())

all_test_labels = np.array(all_test_labels)
print("Test Labels:", all_test_labels)
print(classification_report(all_pred, all_test_labels))
print('Training complete.')


[WinError 2] The system cannot find the file specified
  File "C:\Users\l_alm\miniconda3\envs\pytorch_env\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
  File "C:\Users\l_alm\miniconda3\envs\pytorch_env\lib\subprocess.py", line 505, in run
    with Popen(*popenargs, **kwargs) as process:
  File "C:\Users\l_alm\miniconda3\envs\pytorch_env\lib\subprocess.py", line 951, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\l_alm\miniconda3\envs\pytorch_env\lib\subprocess.py", line 1436, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


(9228, 70, 1) Counter({2: 3076, 1: 3076, 0: 3076})


ValueError: Sum of input lengths does not equal the length of the input dataset!