In [None]:
!pip install tqdm




In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import numpy as np
import time
import shutil
import os
import random
start_time = time.time()

import pandas as pd
import csv
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import requests
import urllib.request
from tqdm import tqdm
from PIL import Image


from torchvision import datasets, models, transforms
import torch.nn as nn


LABELS_FILE_PATH = "filteredFieldorNotLabels.csv"
IMAGE_FOLDER_PATH = "filtered/"
RICE_FOLDER_PATH = "Rice/"

imagesRoot = '/content/drive/MyDrive/GSV-CropType-Thailand/images/'

def loadCSV(LABELS_FILE_PATH):
    filenames = []
    labels = []
    with open(LABELS_FILE_PATH) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                print(f'Column names are {", ".join(row)}')
                line_count += 1
            else:
                filename = row[0]
                idx = filename.find('-')
                filenames.append(filename[idx+1:])
                labels.append(row[1])
                line_count += 1
        print(f'Loaded CSV with {line_count} labels.')
    return filenames, labels

def encodeLabels(labels, classes):
    classes = {'Field': 0, 'Not-Field': 1}
    newLabels = []
    for i, l in enumerate(labels):
        if l not in '':
            newLabels.append(classes[l])
        else:
            newLabels.append(2)
            print(i)
    return newLabels


def getImagesFromFieldNotField(folderPath, batchSize):
    data = []
    fieldnames = os.listdir(folderPath+"field/")
    nfieldnames = os.listdir(folderPath+"notField/")
    print("LOADING")
    testTransform = transforms.Compose([
      transforms.Resize((600, 600)),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  ])
    for fil in fieldnames:
      try:
        im = Image.open(folderPath+"field/"+fil)

        image_array = np.array(im)

        if image_array.shape[2] == 4:
            image_array = image_array[:, :, :3]  # Remove the alpha channel if present
        image = Image.fromarray(image_array)
        cropped_image = image.resize((600, 600))
      except:
        continue
      data.append([testTransform(cropped_image), 0])
    for fil in fieldnames:
      try:
        im = Image.open(folderPath+"notField/"+fil)

        image_array = np.array(im)
        if image_array.shape[2] == 4:
            image_array = image_array[:, :, :3]  # Remove the alpha channel if present
        image = Image.fromarray(image_array)
        cropped_image = image.resize((600, 600))
      except:
        continue
      data.append([testTransform(cropped_image), 1])

    print("Making splits")
    train, tes = torch.utils.data.random_split(data, [int(0.6*len(data)),  len(data)-int(0.6*len(data))])
    test, val = torch.utils.data.random_split(tes, [int(0.5*len(tes)), len(tes)-int(0.5*len(tes))])


    train = DataLoader(train, batch_size = batchSize, shuffle=True)
    test = DataLoader(test, batch_size = batchSize, shuffle=True)
    val = DataLoader(val, batch_size = batchSize, shuffle=True)
    return train, test, val

def uploadImages(folderPath, filenames, labels, batchSize):
    data = []

    labels = encodeLabels(labels)
    fs = os.listdir(folderPath)
    files = [f for f in fs]
    print(files[0:10])
    print(len(filenames))
    j = 0
    for i, fil in enumerate(filenames):

        if fil[0:60] in files:
          if labels[i] == 0 or labels[i] == 1:
            j+=1
            im = torchvision.io.read_image(folderPath + fil)
            data.append([im.to(torch.float), labels[i]])

    splitIdxval = round(len(data)*0.6)
    splitIdxtest = round(len(data)*0.8)



    train = DataLoader(data[:splitIdxval], batch_size = batchSize)
    val = DataLoader(data[splitIdxval:splitIdxtest], batch_size = batchSize)
    test = DataLoader(data[splitIdxtest:], batch_size = batchSize)


    return train, val, test, j

def train_imshow():
    classes = {'Field': 0, 'Not-Field': 1}
    dataiter = iter(trainloader)
    images, labels = dataiter.next()
    print(labels)
    fig, axes = plt.subplots(figsize=(10, 4), ncols=5)
    for i in range(5):
        ax = axes[i]
        ax.imshow(images[i].permute(1, 2, 0))
        ax.title.set_text(' '.join('%5s' % classes[labels[i]]))
    plt.show()


In [None]:
def getTrainIms(filenames, labels):
  outFolder = "fieldOrNot/"
  saveLoc = imagesRoot + outFolder
  fieldDirList = os.listdir(saveLoc+"field/")
  nfieldDirList = os.listdir(saveLoc+"notField/")

  fields = []
  nfields = []
  fBearingPairs = {}
  j = 0
  for i, f in enumerate(filenames):

    if f[29:32] != "GSV":
      fBearingPairs, bearing = setBearing(f, filenames, fBearingPairs)
      pano = f[7:29]
      meta = getMeta(pano, bearing)
      if meta != "404":
        sLoc = saveLoc
        if labels[i] == 'Field':
          sLoc += 'field/'
          dir = fieldDirList

        elif labels[i] == 'Not-Field':
          sLoc += 'notField/'
          dir = nfieldDirList

        getStreet(pano, sLoc, bearing, meta, dir)
        j += 1

  fof = 0
  for i, f in enumerate(filenames):

    if f[29:32] == "GSV":
      if f[:29] in fBearingPairs:

        bearing = getBearing(f, filenames, fBearingPairs)
        pano = f[7:29]
        meta = getMeta(pano, bearing)
        if meta != "404":
          sLoc = saveLoc
          if labels[i] == 'Field':
            sLoc += 'field/'
            dir = fieldDirList

          elif labels[i] == 'Not-Field':
            sLoc += 'notField/'
            dir = nfieldDirList

          getStreet(pano, sLoc, bearing, meta, dir)
          j +=1

  print("total= ",  j)
  print("fof=", fof)

def getBearing(f, filenames, fBearingPairs):

  bearing = float(fBearingPairs[f[:29]]) + 180

  return bearing%360

def setBearing(f, filenames, fBearingPairs):
  fbear = f[29:]
  idx = fbear.find(".")
  fbear = fbear[:idx+2]
  fBearingPairs[f[:29]] = fbear
  return fBearingPairs, fbear

def getMeta(pano, bearing):
  link = "https://maps.googleapis.com/maps/api/streetview/metadata?size=640x640&pano="+str(pano)+"&fov=80&heading=0&pitch=0&key=" + KEY
  res = requests.get(link)
  resJson = res.json()
  if resJson['status'] ==  'OK':
    return resJson['date']+str(pano)+"&bear"+str(bearing) + "&GSVLat"+str(resJson["location"]["lat"])+"&GSVLon"+str(resJson["location"]["lng"])
  else:
    return "404"
def getStreet(pano, SaveLoc, bearing, meta, dirlist):
  link = "https://maps.googleapis.com/maps/api/streetview?size=640x640&pano="+str(pano)+"&fov=70&heading="+str(bearing)+"&pitch=0&key=" + KEY
  fi = meta + ".jpg"
  if fi not in dirlist:
    urllib.request.urlretrieve(link, os.path.join(SaveLoc,fi))

In [None]:

BATCH_SIZE = 32
filenames, labels = loadCSV(imagesRoot + "fieldOrNotLabels-filtered-tight.csv")
trainloader, testloader, valloader = getImagesFromFieldNotField(imagesRoot + "fieldOrNot/", BATCH_SIZE)

Column names are image, choice, created_at, annotation_id, id, lead_time, updated_at, annotator
Loaded CSV with 815 labels.
LOADING
Making splits


In [None]:
def classBalance(labels):
  # encodedLabels = encodeLabels(labels)
  field = 0
  nfield = 0
  for l in labels:
    if l == 0:
      field +=1
    elif l == 1:
      nfield +=1

  print('Field Images : {}, Non-field Images : {}'.format(field,nfield))

classBalance(labels)

NameError: ignored

In [None]:
def make_train_step(model, optimizer, loss_fn):
  def train_step(x,y):
    #make prediction
    yhat = model(x)
    #enter train mode
    model.train()
    #compute loss
    loss = loss_fn(yhat,y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    #optimizer.cleargrads()
    yhatsig = torch.sigmoid(yhat)
    acc = accuracy(yhatsig, y)

    return loss, acc
  return train_step

def accuracy(preds, labels):
    acc = 0
    for i, pred in enumerate(preds):
        p = torch.argmax(pred)

        if torch.round(pred) == labels[i]:
            acc +=1

    return acc/len(preds)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = models.resnet50(pretrained=True)

#add a new final layer
nr_filters = model.fc.in_features  #number of input features of last layer
model.fc = nn.Linear(nr_filters, 1)

model = model.to(device)
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler

#loss
loss_fn = BCEWithLogitsLoss()

#optimizer
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.01, betas=(0.9, 0.999))

#train step
train_step = make_train_step(model, optimizer, loss_fn)

from tqdm import tqdm

losses = []
val_losses = []
accs = []
val_accs = []
epoch_train_losses = []
epoch_test_losses = []
epoch_train_accs = []
epoch_test_accs = []

n_epochs = 20
early_stopping_tolerance = 4
early_stopping_threshold = 1.0

for epoch in range(n_epochs):
  epoch_loss = 0
  epoch_acc = 0
  print(epoch)
  print(len(trainloader))
  for i ,data in tqdm(enumerate(trainloader), total = len(trainloader)): #iterate ove batches
    x_batch , y_batch = data

    x_batch = x_batch.to(device) #move to gpu
    y_batch = y_batch.unsqueeze(1).float() #convert target to same nn output shape
    y_batch = y_batch.to(device) #move to gpu


    loss, acc = train_step(x_batch, y_batch)
    epoch_acc += acc/len(trainloader)
    epoch_loss += loss/len(trainloader)
    losses.append(loss)
    accs.append(acc)

  epoch_train_losses.append(epoch_loss)
  epoch_train_accs.append(epoch_acc)

  print('\nEpoch : {}, train loss : {}, train acc : {}'.format(epoch+1,epoch_loss, epoch_acc))

  with torch.no_grad():
    cum_loss = 0
    cum_acc = 0
    for x_batch, y_batch in valloader:
      x_batch = x_batch.to(device)
      y_batch = y_batch.unsqueeze(1).float() #convert target to same nn output shape
      y_batch = y_batch.to(device)

      #model to eval mode
      model.eval()

      yhat = model(x_batch)
      yhatsig = torch.sigmoid(yhat)

      val_loss = loss_fn(yhat,y_batch)
      cum_loss += val_loss/len(valloader)
      val_losses.append(val_loss.item())

      val_acc = accuracy(yhatsig, y_batch)
      cum_acc += val_acc/len(valloader)
      val_accs.append(val_acc)


    epoch_test_losses.append(cum_loss)
    epoch_test_accs.append(cum_acc)

    print('Epoch : {}, val loss : {} val acc: {}'.format(epoch+1,cum_loss, cum_acc))

    best_loss = min(epoch_test_losses)
    best_acc = max(epoch_test_accs)
    #save best model
    if cum_acc >= best_acc:
      best_acc_model_wts = model.state_dict()
    if cum_loss <= best_loss:
      best_loss_model_wts = model.state_dict()

    #early stopping
    early_stopping_counter = 0
    if cum_acc < best_acc:
      early_stopping_counter +=1

    if (early_stopping_counter == early_stopping_tolerance) or (best_acc >= early_stopping_threshold):
      print("/nTerminating: early stopping")
      break #terminate training

#load best model
model.load_state_dict(best_acc_model_wts)


TEST

In [None]:
from sklearn.metrics import precision_score, recall_score

# print('Labels', np.array(labels).shape)


print(np.array(testloader))

def make_train_step(model, optimizer, loss_fn):
  def train_step(x,y):
    #make prediction
    yhat = model(x)
    #enter train mode
    model.train()
    #compute loss
    loss = loss_fn(yhat,y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    #optimizer.cleargrads()
    yhatsig = torch.sigmoid(yhat)
    acc = accuracy(yhatsig, y)

    return loss, acc
  return train_step

def accuracy(preds, labels):
    acc = 0
    for i, pred in enumerate(preds):
        p = torch.argmax(pred)

        if torch.round(pred) == labels[i]:
            acc +=1

    return acc/len(preds)

from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler

#loss
loss_fn = BCEWithLogitsLoss() #binary cross entropy with sigmoid, so no need to use sigmoid in the model

#optimizer
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.01, betas=(0.9, 0.999))

#train step
train_step = make_train_step(model, optimizer, loss_fn)

from tqdm import tqdm

losses = []
val_losses = []
accs = []
val_accs = []
epoch_train_losses = []
epoch_test_losses = []
epoch_train_accs = []
epoch_test_accs = []

n_epochs = 1
early_stopping_tolerance = 4
early_stopping_threshold = 1.0
y_true = []
y_pred = []
for epoch in range(n_epochs):
  epoch_loss = 0
  epoch_acc = 0
  print(epoch)
  print(len(trainloader))

  #validation doesnt requires gradient
  with torch.no_grad():
    cum_loss = 0
    cum_acc = 0

    for x_batch, y_batch in testloader:
      x_batch = x_batch.to(device)
      y_batch = y_batch.unsqueeze(1).float() #convert target to same nn output shape
      y_batch = y_batch.to(device)

      #model to eval mode
      model.eval()

      yhat = model(x_batch)
      yhatsig = torch.sigmoid(yhat)
      # for i in range(len(yhatsig)):
      #   print(yhatsig[i], y_batch[i])
      y_true.extend(y_batch.cpu().numpy())  # Append true labels
      y_pred.extend(yhatsig.cpu().numpy())  # Append predicted labels
      val_loss = loss_fn(yhat,y_batch)
      cum_loss += val_loss/len(testloader)
      val_losses.append(val_loss.item())

      val_acc = accuracy(yhatsig, y_batch)
      cum_acc += val_acc/len(testloader)
      val_accs.append(val_acc)


    epoch_test_losses.append(cum_loss)
    epoch_test_accs.append(cum_acc)

    print('Epoch : {}, test loss : {} test acc: {}'.format(epoch+1,cum_loss, cum_acc))
    print(np.count_nonzero(np.array(y_pred) > 0.5))
    print(y_true.count(1))
    # print((np.array(y_pred)).astype(int))
    precision = precision_score(y_true, (np.array(y_pred)).astype(int))
    recall = recall_score(y_true, (np.array(y_pred)).astype(int))
    print("Precision", precision)
    print("Recall", recall)

    best_loss = min(epoch_test_losses)
    best_acc = max(epoch_test_accs)
    #save best model
    if cum_acc >= best_acc:
      best_acc_model_wts = model.state_dict()
    if cum_loss <= best_loss:
      best_loss_model_wts = model.state_dict()

    #early stopping
    early_stopping_counter = 0
    if cum_acc < best_acc:
      early_stopping_counter +=1

    if (early_stopping_counter == early_stopping_tolerance) or (best_acc >= early_stopping_threshold):
      print("/nTerminating: early stopping")
      break #terminate training

In [None]:
def testModels(model, testloader):
  with torch.no_grad():
    cum_loss = 0
    cum_acc = 0
    for x_batch, y_batch in testloader:
      x_batch = x_batch.to(device)
      y_batch = y_batch.unsqueeze(1).float() #convert target to same nn output shape
      y_batch = y_batch.to(device)

      #model to eval mode
      model.eval()

      yhat = model(x_batch)
      yhatsig = torch.sigmoid(yhat)

      test_loss = loss_fn(yhat,y_batch)
      cum_loss += test_loss/len(testloader)

      test_acc = accuracy(yhatsig, y_batch)
      cum_acc += test_acc/len(testloader)

    print('Test loss : {} test acc: {}'.format(cum_loss, cum_acc))

# model.load_state_dict(best_acc_model_wts)
testModels(model, testloader)


# Inference Section

In [None]:
def saveModel(model, PATH):
  torch.save(model.state_dict(), PATH)


def loadModel(PATH):
  # model = resnet18(pretrained=True, num_classes=4)  # where num_classes will be different

  model = models.resnet18(pretrained=True)
  nr_filters = model.fc.in_features
  model.fc = nn.Linear(nr_filters, 1)
  model.load_state_dict(torch.load(PATH))
  model.eval()
  return model

PATH = imagesRoot + "fieldOrNot-ResNet18-87%.pt"
model = loadModel(PATH)

# saveModel(model, PATH)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 178MB/s]


In [None]:


folderPath = imagesRoot+"Thailand16/"
filteredFilenames = os.listdir(folderPath)
print(len(filteredFilenames))

In [None]:
outFolderPath = imagesRoot +'ThailandFieldOrNot/'
classes = {0: 'field/', 1: 'notField/'}
out = os.listdir(outFolderPath+'field/')
print(len(out))
out += os.listdir(outFolderPath+'notField/')

allfiles = [x for x in filteredFilenames if x not in out]
print(len(allfiles))


1036


In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image
from tqdm import tqdm
import os
import concurrent.futures

class CustomDataset(Dataset):
    def __init__(self, folder_path, filenames):
        self.folder_path = folder_path
        self.filenames = [fn for fn in filenames if fn.lower().endswith('.jpg')]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = os.path.join(self.folder_path, self.filenames[idx])
        image = torchvision.io.read_image(file_path).to(torch.float)
        return image, self.filenames[idx]

def save_image(out_folder, class_folder, filename, image):
    out_path = os.path.join(out_folder, class_folder, filename)
    image = Image.fromarray(image)
    image.save(out_path)

def saveModelPreds(folderPath, outFolderPath, filenames, classes, numSaved=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    dataset = CustomDataset(folderPath, filenames)
    imsLoader = DataLoader(dataset, batch_size=32)

    model.eval()
    outs = 0
    model.to(device)
    with torch.no_grad(), concurrent.futures.ThreadPoolExecutor() as executor:
        for x_batch, fils in tqdm(imsLoader, total=len(imsLoader)):
            x_batch = x_batch.to(device)
            # print(x_batch.shape)
            yhat = model(x_batch)
            yhatsig = torch.sigmoid(yhat).cpu().numpy()
            # print(np.rint(yhatsig[:, 0]).astype(int))
            classFolders = [classes[int(index)] for index in np.rint(yhatsig[:, 0])]
            im_batch = x_batch.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8)

            for j in range(len(x_batch)):
                future = save_image(outFolderPath, classFolders[j], fils[j], im_batch[j])
                outs += 1

    print("Images Classified:", outs)

outFolderPath = imagesRoot +'ThailandFieldOrNot/'
folderPath = imagesRoot+"Thailand15/"

classes = {0: 'field/', 1: 'notField/'}
# saveModelPreds(folderPath, outFolderPath, filteredFilenames, classes)
saveModelPreds(folderPath, outFolderPath, allfiles, classes)



100%|██████████| 48/48 [05:10<00:00,  6.46s/it]

Images Classified: 1518



