In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split

import numpy as np


import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import v2
from torch.utils.data import Dataset

#Get rid of SettingWithCopyWarnings
pd.options.mode.chained_assignment = None  


In [14]:
PNW = pd.read_csv('../../Data/processedData/PNW/PNW.csv')
PNW = PNW.dropna()
PNW["flowering"] = PNW['reproductiveCondition'].isin(['flowering','flowering|fruiting','flowering|fruiting|flower budding','flowering|flower budding']).astype(int)
PNW["fruiting"] = PNW['reproductiveCondition'].isin(['fruiting','flowering|fruiting','flowering|fruiting|flower budding','fruiting|flower budding']).astype(int)

photoPath = "../../../PNW_Angiosperms/labeled_photos"

In [15]:
#I only have 49,999 so I'm missing one photo. The following code drops that row from the dataframe
for fname in PNW['file_name']: 
    if not os.path.isfile(photoPath + "/" + fname):
        PNW = PNW[PNW['file_name'] != fname]

In [16]:
photos = PNW[["file_name", "flowering"]].reset_index(drop=True)
photos_train, photos_val = train_test_split(list(photos.index), test_size=.25)
p_train= photos.iloc[photos_train]
p_val = photos.iloc[photos_val]

In [17]:
# Define relevant variables for the ML task
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 20

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [18]:
#Some of our images are .pngs, we drop the alpha. 
train_transform = v2.Compose([
    #Rely on the v2 transforms from torchvision.transforms.v2

    #Use tensors instead of PIL images

    #Use torch.uint8 dtype, especially for resizing
                                v2.ToPILImage(),
                                v2.ToTensor(),
                                v2.RandomAffine(degrees=(-180,180), translate=(0,.1), scale=(.9,1)),
                                v2.RandomHorizontalFlip(p=0.5),
                                v2.RandomVerticalFlip(p=0.5),
                                v2.ColorJitter(brightness=.3, hue=.01),
                                v2.Resize ( (256,256) , interpolation=2 ),
                                v2.CenterCrop(224),
                                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                               #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),

                            ])
val_transform = v2.Compose([
                                #transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                                v2.ToPILImage(),
                                v2.ToTensor(),
                                torchvision.transforms.Resize ( (224,224) , interpolation=2 ),
                                v2.Resize ( (256,256) , interpolation=2 ),
                                v2.CenterCrop(224),
                                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                            ])
target_transform = v2.Compose([
                                v2.Lambda(lambda x: torch.tensor(x).long()),
                            ])                         



In [19]:
import os
import pandas as pd
from torchvision.io import read_image

class CustomImageDataset(Dataset):
    def __init__(self, label_frame, img_dir, transform=None, target_transform=None):
        self.img_labels = label_frame
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [20]:
train_data = CustomImageDataset(p_train, photoPath, train_transform, target_transform)
val_data = CustomImageDataset(p_val, photoPath, val_transform, target_transform)

In [21]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=64, shuffle=True)

In [22]:
# Display image and label.
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img.permute(1,2,0))
plt.show()
print(f"Label: {label}")

Feature batch shape: torch.Size([64, 3, 224, 224])
Labels batch shape: torch.Size([64])


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


KeyboardInterrupt: 

In [None]:
#model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

NameError: name 'model' is not defined

In [None]:
def train(model, num_epochs, lr):
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_correct = 0
        
        #Turn the model into training mode
        model.train()

        for data, target in train_dataloader:
            optimizer.zero_grad()
            data = data.to(device)
            output = model(data)
            target=target.to(device)   
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * data.size(0)
            _, pred = torch.max(output, 1)
            train_correct += (pred == target).sum().item()

        train_loss /= len(train_dataloader.dataset)
        train_acc = 100.0 * train_correct / len(train_dataloader.dataset)

        test_loss = 0.0
        test_correct = 0
        model.eval()

        with torch.no_grad():
            for data, target in val_dataloader:
                data = data.to(device)
                target = target.to(device)
                output = model(data)
                loss = criterion(output, target)

                test_loss += loss.item() * data.size(0)
                _, pred = torch.max(output, 1)
                test_correct += (pred == target).sum().item()

        test_loss /= len(val_dataloader.dataset)
        test_acc = 100.0 * test_correct / len(val_dataloader.dataset)

        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

In [None]:
#model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
#train(model, 40, .001)

Epoch 1: Train Loss: 0.6263, Train Acc: 72.47%, Test Loss: 0.4390, Test Acc: 77.67%
Epoch 2: Train Loss: 0.4373, Train Acc: 77.61%, Test Loss: 0.4178, Test Acc: 79.22%
Epoch 3: Train Loss: 0.4206, Train Acc: 78.65%, Test Loss: 0.4070, Test Acc: 79.73%
Epoch 4: Train Loss: 0.4069, Train Acc: 79.41%, Test Loss: 0.3939, Test Acc: 80.54%
Epoch 5: Train Loss: 0.3989, Train Acc: 80.15%, Test Loss: 0.3865, Test Acc: 81.02%
Epoch 6: Train Loss: 0.3928, Train Acc: 80.69%, Test Loss: 0.3944, Test Acc: 80.14%
Epoch 7: Train Loss: 0.3856, Train Acc: 81.07%, Test Loss: 0.3789, Test Acc: 81.54%
Epoch 8: Train Loss: 0.3807, Train Acc: 81.20%, Test Loss: 0.3901, Test Acc: 80.78%
Epoch 9: Train Loss: 0.3762, Train Acc: 81.47%, Test Loss: 0.3893, Test Acc: 80.34%
Epoch 10: Train Loss: 0.3704, Train Acc: 82.00%, Test Loss: 0.3837, Test Acc: 81.75%
Epoch 11: Train Loss: 0.3664, Train Acc: 82.24%, Test Loss: 0.3819, Test Acc: 81.50%
Epoch 12: Train Loss: 0.3657, Train Acc: 82.34%, Test Loss: 0.3797, Test A

In [None]:
model50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model50.parameters(), lr=0.001, momentum=0.9)
model50.to(device)
train(model50, 40, .001)

Using cache found in C:\Users\Sean Haight/.cache\torch\hub\pytorch_vision_v0.10.0


Epoch 1: Train Loss: 0.5838, Train Acc: 73.92%, Test Loss: 0.4099, Test Acc: 79.80%
Epoch 2: Train Loss: 0.4186, Train Acc: 78.73%, Test Loss: 0.3876, Test Acc: 81.33%
Epoch 3: Train Loss: 0.3986, Train Acc: 80.02%, Test Loss: 0.3766, Test Acc: 82.15%
Epoch 4: Train Loss: 0.3839, Train Acc: 81.09%, Test Loss: 0.3703, Test Acc: 82.26%
Epoch 5: Train Loss: 0.3748, Train Acc: 81.63%, Test Loss: 0.3850, Test Acc: 81.14%
Epoch 6: Train Loss: 0.3658, Train Acc: 82.16%, Test Loss: 0.3601, Test Acc: 83.02%
Epoch 7: Train Loss: 0.3606, Train Acc: 82.44%, Test Loss: 0.3777, Test Acc: 81.28%
Epoch 8: Train Loss: 0.3528, Train Acc: 83.15%, Test Loss: 0.3659, Test Acc: 82.70%
Epoch 9: Train Loss: 0.3455, Train Acc: 83.39%, Test Loss: 0.3624, Test Acc: 83.10%
Epoch 10: Train Loss: 0.3406, Train Acc: 83.61%, Test Loss: 0.3588, Test Acc: 82.94%
Epoch 11: Train Loss: 0.3357, Train Acc: 83.99%, Test Loss: 0.3665, Test Acc: 82.76%
Epoch 12: Train Loss: 0.3288, Train Acc: 84.21%, Test Loss: 0.3677, Test A

In [27]:
model50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model50.parameters(), lr=0.001, momentum=0.9)



# Modify the final fully connected layer to include dropout
#num_ftrs = model50.fc.in_features
#model50.fc = nn.Sequential(# Add dropout with probability 0.1
#    nn.Linear(num_ftrs, 2)  # Modify output size according to your task
#)
model50.to(device)
train(model50, 20, .001)


Using cache found in C:\Users\Sean Haight/.cache\torch\hub\pytorch_vision_v0.10.0


KeyboardInterrupt: 