In [36]:
import h5py

# Open the .h5 file in read mode
file = h5py.File('train_catvnoncat.h5', 'r')

# List all top-level keys (datasets or groups)
print("Keys:", list(file.keys()))

# Explore a specific dataset
dataset = file['train_set_x']

print("Dataset shape:", dataset.shape)
# print("Dataset data:", dataset[:])  # Display the dataset content (optional)

train_x = file['train_set_x'][:]
train_y = file['train_set_y'][:]
num_spml, x, y, channels  = train_x.shape
new_train_x = train_x.reshape(num_spml, -1)

print("Number of samples:", num_spml)
print("Image size:", x, "x", y)
print("Number of channels:", channels)
print("New shape:", new_train_x.shape)
# Close the file after use
file.close()

Keys: ['list_classes', 'train_set_x', 'train_set_y']
Dataset shape: (209, 64, 64, 3)
Number of samples: 209
Image size: 64 x 64
Number of channels: 3
New shape: (209, 12288)


In [34]:
import torch
import numpy as np

with h5py.File('train_catvnoncat.h5', 'r') as train_file, h5py.File('test_catvnoncat.h5', 'r') as test_file:
    # Extract the dataset from train_catvnoncat.h5
    train_set_x = train_file['train_set_x'][:]
    train_set_y = train_file['train_set_y'][:]
    
    # Extract the dataset from test_catvnoncat.h5
    test_set_x = test_file['test_set_x'][:]
    test_set_y = test_file['test_set_y'][:]

    # Flatening the data
    new_train_set_x = train_set_x.reshape(train_set_x.shape[0], -1)
    new_test_set_x = test_set_x.reshape(test_set_x.shape[0], -1)

    # Normalizing the data
    new_train_set_x = new_train_set_x / 255.0 
    new_test_set_x = new_test_set_x / 255.0 


In [37]:
# Converting to pytorch tensors
x_train = torch.tensor(new_train_set_x, dtype=torch.float32)
y_train = torch.tensor(train_set_y, dtype=torch.float32).view(-1, 1)

x_test = torch.tensor(new_test_set_x, dtype=torch.float32)
y_test = torch.tensor(test_set_y, dtype=torch.float32).view(-1, 1)

torch.Size([209, 12288])


In [33]:
# a logistic regression model
class LogesticRegression(torch.nn.Module):
    def __init__(self, input_size):
        super(LogesticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_size, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

In [79]:
import torch.optim as optim

input_size = x_train.shape[1]
model = LogesticRegression(input_size)

criteria = torch.nn.BCELoss() # Binary Cross Entropy Loss
optimizer = optim.SGD(model.parameters(), lr=0.1) # Stochastic Gradient Descent

num_epochs = 4000
for epochs in range(num_epochs):
    optimizer.zero_grad()
    y_pred = model(x_train)
    loss = criteria(y_pred, y_train)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epochs+1}/{num_epochs}, Loss: {loss.item()}')

Epoch 1/4000, Loss: 0.6552146673202515
Epoch 2/4000, Loss: 3.303086757659912
Epoch 3/4000, Loss: 64.27928924560547
Epoch 4/4000, Loss: 64.27488708496094
Epoch 5/4000, Loss: 64.2711410522461
Epoch 6/4000, Loss: 64.26753234863281
Epoch 7/4000, Loss: 64.26365661621094
Epoch 8/4000, Loss: 64.25993347167969
Epoch 9/4000, Loss: 64.2560806274414
Epoch 10/4000, Loss: 64.25226593017578
Epoch 11/4000, Loss: 64.24846649169922
Epoch 12/4000, Loss: 64.24464416503906
Epoch 13/4000, Loss: 64.2408218383789
Epoch 14/4000, Loss: 64.23703002929688
Epoch 15/4000, Loss: 64.23322296142578
Epoch 16/4000, Loss: 64.22943115234375
Epoch 17/4000, Loss: 64.22563934326172
Epoch 18/4000, Loss: 64.22186279296875
Epoch 19/4000, Loss: 64.21809387207031
Epoch 20/4000, Loss: 64.21434020996094
Epoch 21/4000, Loss: 64.21060180664062
Epoch 22/4000, Loss: 64.20689392089844
Epoch 23/4000, Loss: 64.20319366455078
Epoch 24/4000, Loss: 64.19952392578125
Epoch 25/4000, Loss: 64.1958999633789
Epoch 26/4000, Loss: 64.1923065185546

In [82]:
model.eval() # Set the model to evaluation mode(disable dropout and batch normalization)
with torch.no_grad():
    y_pred = model(x_test)
    y_pred = torch.round(y_pred)
    accuracy = (y_pred == y_test).sum().item() / y_test.shape[0]
    print(f'Accuracy: {accuracy*100}%')

Accuracy: 66.0%
