In [1]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..','..', 'dataset'))
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from TwitterImageDataset import TwitterImageDataset

# Define image transformations (resize, normalize)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize for GoogLeNet
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

# Load dataset
dataset = TwitterImageDataset("../../AutoDep_Master", transform=transform)

# Split dataset into train (80%) and test (20%)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load pretrained GoogLeNet
model = models.googlenet(pretrained=True)

# Modify the final fully connected layer for binary classification
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # 2 classes: Control (0) / Diagnosed (1)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

print('Training has started...')

# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

# Evaluation
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy:.4f}")


  Referenced from: <367D4265-B20F-34BD-94EB-4F3EE47C385B> /Users/ardakabadayi/Desktop/Code/Syracuse/CIS662(IntroToMl)/env/lib/python3.12/site-packages/torchvision/image.so
  warn(


../../AutoDep_Master/control_group/users
../../AutoDep_Master/diagnosed_group/users


Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /Users/ardakabadayi/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|████████████████████████████████| 49.7M/49.7M [00:00<00:00, 54.4MB/s]


Training has started...


KeyboardInterrupt: 