In [1]:
import torch
import torch.nn as nn  
import torchvision.transforms as transforms  
import os
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from PIL import Image
import shutil

In [2]:
parent_dir = "D:\Training\itHillel\Machine Learning\Lesson_12\celeba"
celeba_raw_folder = os.path.join("Celeba_raw", "raw")
img_dir = os.path.join(parent_dir, celeba_raw_folder, "img_align_celeba") + os.sep
out_dir = os.path.join(parent_dir, "celeba_preprocessed")

columns = ["Smiling"]


df = pd.read_csv(
    os.path.join(parent_dir, celeba_raw_folder, "list_attr_celeba.txt"),
    sep="\s+",
    skiprows=1,
    usecols=columns,
)


df.loc[df["Smiling"] == -1, "Smiling"] = 0


length = len(df)
data_node_1 = df.iloc[: int(length / 3)]
data_node_2 = df.iloc[int(length / 3) : int(length / 3) * 2]
data_node_3 = df.iloc[int(length / 3) * 2 :]


if not os.path.exists(os.path.join(out_dir, "data_node_1")):
    os.makedirs(os.path.join(out_dir, "data_node_1", "data"))
if not os.path.exists(os.path.join(out_dir, "data_node_2")):
    os.makedirs(os.path.join(out_dir, "data_node_2", "data"))
if not os.path.exists(os.path.join(out_dir, "data_node_3")):
    os.makedirs(os.path.join(out_dir, "data_node_3", "data"))


data_node_1.to_csv(os.path.join(out_dir, "data_node_1", "target.csv"), sep="\t")
data_node_2.to_csv(os.path.join(out_dir, "data_node_2", "target.csv"), sep="\t")
data_node_3.to_csv(os.path.join(out_dir, "data_node_3", "target.csv"), sep="\t")


for im in data_node_1.index:
    shutil.copy(img_dir + im, os.path.join(out_dir, "data_node_1", "data", im))
print("data for node 1 succesfully created")

for im in data_node_2.index:
    shutil.copy(img_dir + im, os.path.join(out_dir, "data_node_2", "data", im))
print("data for node 2 succesfully created")

for im in data_node_3.index:
    shutil.copy(img_dir + im, os.path.join(out_dir, "data_node_3", "data", im))
print("data for node 3 succesfully created")

data for node 1 succesfully created
data for node 2 succesfully created
data for node 3 succesfully created


In [3]:
class CelebADataset(Dataset):
    def __init__(self, partition_file_path, root_dir, transform=None):
        df = pd.read_csv(partition_file_path, sep="\t", index_col=0)
        self.root_dir = root_dir
        self.partition_file_path = partition_file_path
        self.img_names = df.index.values
        self.y = df['Smiling'].values
        self.transform = transform
        print("celeba dataset finished")

    def __getitem__(self, index):
        img = np.asarray(Image.open(os.path.join(self.root_dir, self.img_names[index])))
        img = transforms.ToTensor()(img)
        label = self.y[index]
        return img, label
    
    def __len__(self):
        return self.y.shape[0]

In [4]:
data_root = "D:\Training\itHillel\Machine Learning\Lesson_12\celeba\celeba_preprocessed"
trainset = CelebADataset(f"{data_root}/data_node_2/target.csv", f"{data_root}\data_node_2\data")
testset = CelebADataset(f"{data_root}/data_node_3/target.csv", f"{data_root}\data_node_3\data")

trainloader = DataLoader(trainset, shuffle=True)
testloader = DataLoader(testset, shuffle=True)

celeba dataset finished
celeba dataset finished


In [5]:
class Net(nn.Module):

    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 32, 3, 1)
        self.conv3 = nn.Conv2d(32, 32, 3, 1)
        self.conv4 = nn.Conv2d(32, 32, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)

        self.fc1 = nn.Linear(3168, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):

        x = self.conv1(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.conv4(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)

        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [6]:
model = Net()
print(model)

Net(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=3168, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)


In [7]:
training_args = {
    'loader_args': {'batch_size': 32}, 
    'optimizer_args': {'lr': 1e-3},
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 
}

In [8]:
loss = nn.NLLLoss()

optimizer = torch.optim.SGD(
    model.parameters(), lr=training_args["optimizer_args"]["lr"]
)
batch_size = training_args["loader_args"]["batch_size"]

In [9]:
def train_loop(dataloader, model):

    size = len(dataloader.dataset)
    model.train()

    for batch, (X, y) in enumerate(dataloader):

        pred = model(X)
        loss_res = loss(pred, y)
        loss_res.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:

            loss_res, current = loss_res.item(), batch * batch_size + len(X)
            print(f"loss: {loss_res:>7f}  [{current:>5d}/{size:>5d}]")

In [10]:
def test_loop(dataloader, model):
    
    model.train()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [11]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(trainloader, model)
print("Done!")

Epoch 1
-------------------------------


  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()


loss: 0.684366  [    1/   83]
Epoch 2
-------------------------------
loss: 0.686815  [    1/   83]
Epoch 3
-------------------------------
loss: 0.696116  [    1/   83]
Epoch 4
-------------------------------
loss: 0.697665  [    1/   83]
Epoch 5
-------------------------------
loss: 0.679308  [    1/   83]
Done!


In [12]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    test_loop(testloader, model)
print("Done!")

Epoch 1
-------------------------------
Test Error: 
 Accuracy: 56.0%, Avg loss: 0.691869 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 42.9%, Avg loss: 0.694014 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 61.9%, Avg loss: 0.689092 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 48.8%, Avg loss: 0.692979 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 54.8%, Avg loss: 0.691499 

Done!
