# Representation Learning

## Packages and Presets

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import yaml
import math
from tqdm import tqdm, trange
import os

import matplotlib.pyplot as plt
import seaborn as sns   

import copy
from tqdm import tqdm

from sklearn.metrics import (
    f1_score, 
    balanced_accuracy_score, 
    confusion_matrix,
    ConfusionMatrixDisplay, 
    accuracy_score,
    classification_report
)
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

from torch.utils.tensorboard import SummaryWriter
from imblearn.over_sampling import SMOTE

import warnings

from info_nce import InfoNCE

from torch import Tensor

pd.set_option('display.max_columns', None)

In [2]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

with open("representation_learning_config.yaml", "r") as file:
    config = yaml.safe_load(file)

## Data Loading and Preprocessing

In [3]:
# Load data
train_df = pd.read_csv(config["paths"]["mitbih_train"], header=None)
test_df = pd.read_csv(config["paths"]["mitbih_train"], header=None)

In [4]:
print(f"{train_df.shape=}")
print(f"{test_df.shape=}")

train_df.shape=(87554, 188)
test_df.shape=(87554, 188)


In [8]:
# relative frequency of each label class in %
train_df.iloc[:, -1].value_counts(normalize = True) * 100

187
0.0    82.772917
4.0     7.345181
2.0     6.610777
1.0     2.539005
3.0     0.732120
Name: proportion, dtype: float64

As we can see the train set is very imbalanced. As a consequence, we will once again make use of SMOTE oversampling.

In [None]:
X_train_full = train_df.iloc[:, :-1].to_numpy()
y_train_full = train_df.iloc[:, -1].to_numpy()

X_test = test_df.iloc[:, :-1].to_numpy()
y_test = test_df.iloc[:, -1].to_numpy()