In [35]:
import torch
import torch.nn as nn
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.models import resnet50
from torch.optim import Adam
import h5py
import imageio as img
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset
class MyDataset(Dataset):
    def __init__(self, h5_file, transform=None):
        self.file = h5py.File(h5_file, 'r')
        self.transform = transform
        self.X = self.file['jet']
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image = self.X[idx]
        if self.transform:
            image = self.transform(image)
        return image
def augment(x):
    x = x.numpy()
    # Define the augmentation
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.GaussianBlur(kernel_size=3),
        transforms.RandomRotation(degrees=10),
        transforms.ToTensor()
    ])
    y_a = torch.stack([transform(transforms.ToPILImage()(channel)) for img in x for channel in img])
    y_b = torch.stack([transform(transforms.ToPILImage()(channel)) for img in x for channel in img])

    # Reshape the output to match the original shape
    y_a = y_a.view(*x.shape)
    y_b = y_b.view(*x.shape)
    return y_a, y_b
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResBlock, self).__init__()
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        identity = self.shortcut(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x += identity
        x = self.relu(x)
        return x

class ResNet15(nn.Module):
    def __init__(self, num_classes):
        super(ResNet15, self).__init__()
        self.conv0 = nn.Conv2d(8, 120, kernel_size=125, stride=2, padding=3, bias=False)
        self.bn0 = nn.BatchNorm2d(120)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.res_blocks = nn.Sequential(
            ResBlock(120, 120),
            ResBlock(120, 120),
            ResBlock(120, 90, stride=2),
            ResBlock(90, 60),
            ResBlock(60, 30, stride=2),
            ResBlock(30, 30)
        )

        self.conv_final = nn.Conv2d(30, 16, kernel_size=3, stride=2, padding=1)
        self.bn_final = nn.BatchNorm2d(16)
        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(16, num_classes)

    def forward(self, x):
        x = self.conv0(x)
        x = self.bn0(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.res_blocks(x)
        x = self.conv_final(x)
        x = self.bn_final(x)
        x = self.relu(x)
        x = self.global_avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Create the model
model = ResNet15(num_classes=1)
print(model)


ResNet15(
  (conv0): Conv2d(8, 120, kernel_size=(125, 125), stride=(2, 2), padding=(3, 3), bias=False)
  (bn0): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (res_blocks): Sequential(
    (0): ResBlock(
      (shortcut): Sequential()
      (conv1): Conv2d(120, 120, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(120, 120, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResBlock(
      (shortcut): Sequential()
      (conv1): Conv2d(120, 120, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stat

# Lars Optimizer

In [2]:
import torch
from torch.optim import Optimizer

class LARS(Optimizer):
    def __init__(self, params, lr=0.1, trust_coef=0.001, eps=1e-8):
        defaults = dict(lr=lr, trust_coef=trust_coef, eps=eps)
        super(LARS, self).__init__(params, defaults)

    def step(self):
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                global_lr = group['lr']
                velocity = global_lr * p.grad.data

                trust_coef = group['trust_coef']
                eps = group['eps']

                param_norm = torch.norm(p.data)
                grad_norm = torch.norm(p.grad.data)

                local_lr = trust_coef * param_norm / (grad_norm + param_norm * eps)
                adjusted_lr = min(local_lr, global_lr)

                velocity.add_(adjusted_lr, p.data)
                p.data.add_(-velocity)


# Barlow Twins Custom Loss Function

In [3]:

transform = transforms.Compose([

    transforms.ToTensor(),
])

# Wrap the SGD optimizer with LARS
optimizer = LARS(model.parameters(), lr=0.0001, trust_coef=0.001)
trainset = MyDataset('/kaggle/input/datqwe/Dataset_Specific_Unlabelled.h5',transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

output_dim = 512
lambda_=0.01
epoch=0
# f: encoder network # lambda: weight on the off-diagonal terms # N: batch size # D: dimensionality of the embeddings # # mm: matrix-matrix multiplication # off_diagonal: off-diagonal elements of a matrix # eye: identity matrix 
for i in range(2):
    epoch+=1
    epoch_loss = 0.0

    for x in trainloader: # load a batch with N samples # two randomly augmented versions of x 
        y_a, y_b = augment(x) # compute embeddings 
        z_a = model(y_a)  # NxD
        z_b = model(y_b)  # NxD
 
    # Normalize repr. along the batch dimension
        z_a_norm = (z_a - z_a.mean(0)) / z_a.std(0)  # NxD
        z_b_norm = (z_b - z_b.mean(0)) / z_b.std(0)  # NxD

    # Cross-correlation matrix
        c = torch.mm(z_a_norm.T, z_b_norm) / x.size(0)  # DxD

    # Loss
        c_diff = (c - torch.eye(output_dim)).pow(2)  # DxD

    # Multiply off-diagonal elems of c_diff by lambda
        c_diff_off_diag = c_diff - torch.diag(torch.diag(c_diff))
        c_diff_off_diag.mul_(lambda_)
        c_diff = torch.diag(torch.diag(c_diff)) + c_diff_off_diag

        loss = c_diff.sum()
        epoch_loss += loss.item()
    # Optimization step
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f'Epoch {epoch}, Loss: {epoch_loss / len(trainloader)}')



	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at ../torch/csrc/utils/python_arg_parser.cpp:1519.)
  velocity.add_(adjusted_lr, p.data)


Epoch 1, Loss: 493.83897341339826
Epoch 2, Loss: 486.0946560272022


In [5]:
torch.save(model.state_dict(), 'res.pth')


In [7]:
import h5py
import imageio as img
import matplotlib.pyplot as plt
import numpy as np
# Open the HDF5 file
file = h5py.File('/kaggle/input/qweewq/Dataset_Specific_labelled.h5', 'r')
# Now you can read datasets from the fil
data1 = file['/jet']
y1 = file['/Y']
print(y1[1])

[0.]


In [11]:
test_x = data1[8000:]
train_x = data1[:8000]
test_y = y1[8000:]
train_y = y1[:8000]

In [12]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch
from torch import nn
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from transformers import AutoModel

class CustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



train_dataset = CustomDataset(torch.from_numpy(train_x).float(), torch.from_numpy(train_y).long())
test_dataset = CustomDataset(torch.from_numpy(test_x).float(), torch.from_numpy(test_y).long())

batch_size=64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Results with Self-training

In [None]:
import torch
criterion =nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters())


num_epochs=5
# Training loop
for epoch in range(num_epochs):
    epoch_loss=0.0
    for i, (data, labels) in enumerate(train_loader):
        data=data.permute(0, 3, 2, 1)
        # Forward pass
        outputs = model(data)

        loss = criterion(outputs, labels.float())
        epoch_loss += loss.item()
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item()}')




In [32]:
# Evaluation
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for data, labels in test_loader:
        data=data.permute(0, 3, 2, 1)

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

print('ROC-AUC score: ', roc_auc_score(y_true, y_pred))

ROC-AUC score:  0.5


In [34]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np


mae = mean_absolute_error(y_true,y_pred)
mse = mean_squared_error(y_true,y_pred)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 0.493
Mean Squared Error (MSE): 0.493
Root Mean Squared Error (RMSE): 0.7021395872616784


# Results without Self-Training

In [36]:
import torch
criterion =nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters())


num_epochs=5
# Training loop
for epoch in range(num_epochs):
    epoch_loss=0.0
    for i, (data, labels) in enumerate(train_loader):
        data=data.permute(0, 3, 2, 1)
        # Forward pass
        outputs = model(data)

        loss = criterion(outputs, labels.float())
        epoch_loss += loss.item()
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item()}')


# Evaluation
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for data, labels in test_loader:
        data=data.permute(0, 3, 2, 1)

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

print('ROC-AUC score: ', roc_auc_score(y_true, y_pred))

Epoch [1/5], Step [125/125], Loss: 0.43758824467658997
Epoch [2/5], Step [125/125], Loss: 0.15018409490585327
Epoch [3/5], Step [125/125], Loss: 0.11488958448171616
Epoch [4/5], Step [125/125], Loss: 0.18253324925899506
Epoch [5/5], Step [125/125], Loss: 0.0474543496966362
ROC-AUC score:  0.5


In [37]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np


mae = mean_absolute_error(y_true,y_pred)
mse = mean_squared_error(y_true,y_pred)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 0.493
Mean Squared Error (MSE): 0.493
Root Mean Squared Error (RMSE): 0.7021395872616784
