In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import pandas as pd
from PIL import Image


# Training

In [2]:
class GlassesDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = str(self.dataframe.iloc[idx, 0])  # Convert to string
        try:
            image = Image.open(img_name).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_name}: {e}")
            return None, None  # Return None if there's an error loading the image

        label = int(self.dataframe.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)

        return image, label


In [3]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])




In [4]:
# Load data
train_df = pd.read_csv('C:/Users/sanda/OneDrive - University of Central Florida/UCF/4_Spring_2024/Courses/STA6367 Statistical Methodology for Data Science II/Final Project/glasses/train.csv')
train_directory = 'C:/Users/sanda/OneDrive - University of Central Florida/UCF/4_Spring_2024/Courses/STA6367 Statistical Methodology for Data Science II/Final Project/glasses/faces-spring-2020/faces-spring-2020/face-'
train_df['path'] = train_df['id'].apply(lambda x: train_directory + str(x) + '.png').tolist()
train_df = train_df[['path', 'glasses']]

In [5]:
# Create datasets
train_dataset = GlassesDataset(train_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# CNN 1

In [6]:

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 500)  # Adjusted input size for 64x64 images
        self.fc2 = nn.Linear(500, 1)  # Output dimension is 1 for binary classification
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 32 * 16 * 16)  # Adjusted input size for 64x64 images
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))  # Sigmoid activation for binary classification
        return x




In [7]:

# Initialize the model
model = CNN()

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Training the model
train_losses = []  # to track the training loss over epochs
for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.float().unsqueeze(1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/{20}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
            train_losses.append(running_loss / 100)
            running_loss = 0.0

print('Finished Training')



Epoch [1/20], Step [100/141], Loss: 0.5888
Epoch [2/20], Step [100/141], Loss: 0.3378
Epoch [3/20], Step [100/141], Loss: 0.3087
Epoch [4/20], Step [100/141], Loss: 0.2792
Epoch [5/20], Step [100/141], Loss: 0.2578
Epoch [6/20], Step [100/141], Loss: 0.2523
Epoch [7/20], Step [100/141], Loss: 0.2520
Epoch [8/20], Step [100/141], Loss: 0.2298
Epoch [9/20], Step [100/141], Loss: 0.2136
Epoch [10/20], Step [100/141], Loss: 0.1953
Epoch [11/20], Step [100/141], Loss: 0.1754
Epoch [12/20], Step [100/141], Loss: 0.1568
Epoch [13/20], Step [100/141], Loss: 0.1398
Epoch [14/20], Step [100/141], Loss: 0.1271
Epoch [15/20], Step [100/141], Loss: 0.1033
Epoch [16/20], Step [100/141], Loss: 0.0957
Epoch [17/20], Step [100/141], Loss: 0.0913
Epoch [18/20], Step [100/141], Loss: 0.0753
Epoch [19/20], Step [100/141], Loss: 0.0574
Epoch [20/20], Step [100/141], Loss: 0.0314
Finished Training


In [22]:
# Function to calculate accuracy
def calculate_accuracy(loader, model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            outputs = model(images)
            predicted = (outputs > 0.5).float()  # Predicted as positive if output > 0.5
            total += labels.size(0)
            correct += (predicted == labels.unsqueeze(1)).sum().item()
    return correct / total

# Calculate training accuracy
train_accuracy = calculate_accuracy(train_loader, model)
print(f"Training Accuracy: {train_accuracy}")

Training Accuracy: 0.9971111111111111


# Test

In [19]:
class GlassesTestDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = str(self.dataframe.iloc[idx, 0])  # Convert to string
        try:
            image = Image.open(img_name).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_name}: {e}")
            return None

        if self.transform:
            image = self.transform(image)

        return image


In [20]:

test_df = pd.read_csv('C:/Users/sanda/OneDrive - University of Central Florida/UCF/4_Spring_2024/Courses/STA6367 Statistical Methodology for Data Science II/Final Project/glasses/test.csv')
test_directory = 'C:/Users/sanda/OneDrive - University of Central Florida/UCF/4_Spring_2024/Courses/STA6367 Statistical Methodology for Data Science II/Final Project/glasses/faces-spring-2020/test/face-'
test_df['path'] = test_df['id'].apply(lambda x: test_directory + str(x) + '.png').tolist()
test_df = test_df[['path']]


In [21]:
test_dataset = GlassesTestDataset(test_df, transform=transform)

# Create data loader for testing
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Make predictions on the test set
test_predictions = []

with torch.no_grad():
    for images in test_loader:
        outputs = model(images)
        predictions = torch.round(outputs)  # Round the probabilities to get binary predictions
        test_predictions.extend(predictions.numpy().flatten())

# Convert predictions to binary values
test_predictions = [int(pred) for pred in test_predictions]

# Print the predictions
print(test_predictions)


[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 

In [33]:
ytest = pd.read_csv('C:/Users/sanda/OneDrive - University of Central Florida/UCF/4_Spring_2024/Courses/STA6367 Statistical Methodology for Data Science II/Final Project/glasses/ytrain.csv', header=None)

In [27]:
matrix = ytest.to_numpy()

In [34]:
vector = ytest.to_numpy().flatten()

In [35]:
len(vector)

500

In [43]:
pred_vals = 0

# Iterate over each pair of elements in test_predictions and vector
for pred, true_val in zip(test_predictions, vector):
    if pred == true_val:
        pred_vals += 1

print(pred_vals/len(test_predictions)*100)


89.8


# CNN 2

In [41]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding=2)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * 4 * 4, 500)  # Adjusted input size for 64x64 images after 4 pooling layers
        self.fc2 = nn.Linear(500, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.relu(self.conv3(x))
        x = self.pool3(x)
        x = self.relu(self.conv4(x))
        x = self.pool4(x)
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Initialize the model
model = CNN()

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Training the model
train_losses = []  # to track the training loss over epochs
for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.float().unsqueeze(1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/{20}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
            train_losses.append(running_loss / 100)
            running_loss = 0.0

print('Finished Training')


Epoch [1/20], Step [100/141], Loss: 0.6537
Epoch [2/20], Step [100/141], Loss: 0.3789
Epoch [3/20], Step [100/141], Loss: 0.3100
Epoch [4/20], Step [100/141], Loss: 0.2960
Epoch [5/20], Step [100/141], Loss: 0.2880
Epoch [6/20], Step [100/141], Loss: 0.2702
Epoch [7/20], Step [100/141], Loss: 0.2577
Epoch [8/20], Step [100/141], Loss: 0.2375
Epoch [9/20], Step [100/141], Loss: 0.2204
Epoch [10/20], Step [100/141], Loss: 0.2111
Epoch [11/20], Step [100/141], Loss: 0.1949
Epoch [12/20], Step [100/141], Loss: 0.1826
Epoch [13/20], Step [100/141], Loss: 0.1719
Epoch [14/20], Step [100/141], Loss: 0.1459
Epoch [15/20], Step [100/141], Loss: 0.1237
Epoch [16/20], Step [100/141], Loss: 0.1062
Epoch [17/20], Step [100/141], Loss: 0.1011
Epoch [18/20], Step [100/141], Loss: 0.0680
Epoch [19/20], Step [100/141], Loss: 0.0551
Epoch [20/20], Step [100/141], Loss: 0.0752
Finished Training


In [42]:
test_dataset = GlassesTestDataset(test_df, transform=transform)

# Create data loader for testing
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Make predictions on the test set
test_predictions = []

with torch.no_grad():
    for images in test_loader:
        outputs = model(images)
        predictions = torch.round(outputs)  # Round the probabilities to get binary predictions
        test_predictions.extend(predictions.numpy().flatten())

# Convert predictions to binary values
test_predictions = [int(pred) for pred in test_predictions]

# Print the predictions
print(test_predictions)


[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 

In [43]:
ytest = pd.read_csv('C:/Users/sanda/OneDrive - University of Central Florida/UCF/4_Spring_2024/Courses/STA6367 Statistical Methodology for Data Science II/Final Project/glasses/ytrain.csv', header=None)

In [44]:
vector = ytest.to_numpy().flatten()

In [45]:
pred_vals = 0

# Iterate over each pair of elements in test_predictions and vector
for pred, true_val in zip(test_predictions, vector):
    if pred == true_val:
        pred_vals += 1

print(pred_vals/len(test_predictions)*100)


89.8
