In [7]:
# %load_ext autoreload
# %autoreload 2
!pip install torchsummary

 # Imports, Constants, Classes

In [8]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
from tqdm.notebook import tqdm

In [9]:
try:
    from enfify import PROCESSED_DATA_DIR
except ImportError:
    DATA_DIR = Path("/kaggle/input/enf-datd-frequency-features")
else:
    DATA_DIR = PROCESSED_DATA_DIR

# Ensure reproducibility
torch.manual_seed(0)
np.random.seed(0)

In [10]:
# Use CUDA if a GPU is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print("Using device", device)

In [11]:
# Hyperparameters
NUM_EPOCHS = 2_000
BATCH_SIZE = 32
LEARNING_RATE = 0.1
NOMINAL_ENF = 50.0
TEST_SIZE = 0.2

In [12]:
class OneDCNN(nn.Module):
    def __init__(self):
        super(OneDCNN, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=75, kernel_size=130, stride=1) # Convolutional Layer
        self.pool = nn.MaxPool1d(kernel_size=2861-129)  # Max-Pooling Layer: Adjust kernel size to pool to (75, 1)
        
        self.fc1 = nn.Linear(75, 128) # Dense Layer to go from (75) to (128)
        self.fc2 = nn.Linear(128, 2) # Output Layer to go from (128) to (10) or 2 for binary classification
        
    def forward(self, x):
        # Apply convolutional layer with ReLU activation
        x = F.relu(self.conv1(x))
        
        # Apply max pooling
        x = self.pool(x) # Shape after pooling: (batch_size, 75, 1)
        
        # Transpose to get (batch_size, 1, 75)
        x = x.squeeze(-1)  # Squeeze to remove the last dimension -> (batch_size, 75)
        x = x.unsqueeze(1)  # Add a new dimension to get (batch_size, 1, 75)
        
        # Apply fully connected layer to get (batch_size, 1, 128)
        x = F.relu(self.fc1(x.squeeze(1)))  # Remove dimension for dense layer and apply ReLU
        x = x.unsqueeze(1)  # Add back the dimension -> (batch_size, 1, 128)
        
        # Apply final output layer to get (batch_size, 1, 2)
        x = self.fc2(x.squeeze(1))  # Remove dimension, apply final dense layer
        
        # Apply softmax to get class probabilities
        x = F.softmax(x, dim=1)  # Softmax over the class dimension (dim=1)
        
        return x

class EarlyStopping:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

 # Initializing

In [32]:
# Initialize the model
model = OneDCNN().to(device)

# Load numpy data
dsets = ["Carioca1", "Carioca2", "Synthetic", "WHU_ref"]
files = sorted(sum([list((DATA_DIR / dset).glob("*.npy")) for dset in dsets], list()))
labels = [int("tamp" in file.stem) for file in files]
data = np.stack([np.load(file) for file in files])

# Clip edges
data = [seq[40:-40] for seq in data]

# Pad sequences
data_padded = [torch.tensor(seq, dtype=torch.float32) for seq in data]
data_padded = pad_sequence(data_padded, batch_first=True, padding_value=NOMINAL_ENF)

# Berechne den Median und den IQR jeder Zeitreihe
data_median = np.median(data_padded.numpy(), axis=1, keepdims=True)
data_iqr = np.percentile(data_padded.numpy(), 75, axis=1, keepdims=True) - np.percentile(data_padded.numpy(), 25, axis=1, keepdims=True)

# Normalisiere die Zeitreihen
data_normalized = (data_padded.numpy() - data_median) / data_iqr

# Konvertiere die normalisierten Daten zurück zu einem Tensor
data_normalized = torch.tensor(data_normalized, dtype=torch.float32)

input_size = (1, data_normalized.shape[1])

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(
    data_normalized, labels, test_size=TEST_SIZE, random_state=0, stratify=labels
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=TEST_SIZE, random_state=0, stratify=y_train
)

# Convert data to tensors
X_train = torch.Tensor(X_train).unsqueeze(1).to(device)
y_train = torch.LongTensor(y_train).to(device)
X_val = torch.Tensor(X_val).unsqueeze(1).to(device)
y_val = torch.LongTensor(y_val).to(device)
X_test = torch.Tensor(X_test).unsqueeze(1).to(device)
y_test = torch.LongTensor(y_test).to(device)

# # Check label distribution
# unique, counts = np.unique(y_train.cpu(), return_counts=True)
# class_distribution = dict(zip(unique, counts))
# print(f"y_train distribution: {class_distribution}")
# unique, counts = np.unique(y_val.cpu(), return_counts=True)
# class_distribution = dict(zip(unique, counts))
# print(f"y_val distribution: {class_distribution}")
# unique, counts = np.unique(y_test.cpu(), return_counts=True)
# class_distribution = dict(zip(unique, counts))
# print(f"y_test distribution: {class_distribution}")

# Create TensorDatasets
X_train_dataset = TensorDataset(X_train, y_train)
X_val_dataset = TensorDataset(X_val, y_val)
X_test_dataset = TensorDataset(X_test, y_test)

# Data loaders
X_train_loader = DataLoader(X_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
X_val_loader = DataLoader(X_val_dataset, batch_size=BATCH_SIZE, shuffle=False)
X_test_loader = DataLoader(X_test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [33]:
# Plot a sample from the data
plt.figure(figsize=(10, 2))
plt.plot(data_normalized[0])
plt.title("Sample Data Plot")
plt.show()


In [34]:
# Print a single batch from the DataLoader
inputs, labels = next(iter(X_train_loader))
print(f"Input batch shape: {inputs.shape}")
print(f"Labels batch: {labels}")


In [35]:
# Initialize loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
# optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
# optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [36]:
# Display model summary
summary(model, input_size=input_size)

# Training

In [None]:
# Initialize EarlyStopping object
early_stopping = EarlyStopping(patience=10, min_delta=0.001)

# Training and validation loop
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in tqdm(range(NUM_EPOCHS), desc="Training Progress"):
    # Training
    model.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0
    for inputs, labels in X_train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss /= len(X_train_loader)
    train_losses.append(train_loss)
    train_accuracy = correct_train / total_train
    train_accuracies.append(train_accuracy)

    # Validation
    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for inputs, labels in X_val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss /= len(X_val_loader)
    val_losses.append(val_loss)
    val_accuracy = correct_val / total_val
    val_accuracies.append(val_accuracy)

    if (epoch + 1) % 100 == 0 or epoch == 0:
        tqdm.write(f"Epoch {epoch + 1}/{NUM_EPOCHS}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}")

    # Check early stopping condition
    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping")
        break

# Testing

In [None]:
# Testing phase
model.eval()
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in X_test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_loss /= len(X_test_loader)
test_accuracy = correct / total

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Plot the training and validation losses and accuracies
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss", alpha=.5)
plt.plot(val_losses, label="Validation Loss", alpha=.5)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label="Train Accuracy", alpha=.5)
plt.plot(val_accuracies, label="Validation Accuracy", alpha=.5)
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Training and Validation Accuracy")

plt.show()

# Save the model
torch.save(model.state_dict(), "model.pth")
print("Model saved successfully!")