In [None]:
## Dataset should be arranged in two folder,
##### Dataroot
#####  |
#####  ├── images (jpeg/png RGB images)
#####  └── labels (label files in yolo format. Each label file should have same basename as corresponding image file)

##### The code must run from project root directory (For Example: ~/work/yolov5)

> Run the notebook one section at a time only. 
>
> WARNING: Running the entire notebook at once will cause errors !!!

## 1. Training

In [None]:
# dataset parameters
dataRoot = "/home/varun/work/yolov5/data/labData/corn-n-soy"
trainRelativePath = "images"
valRelativePath = "images"
yamlPath = "/home/varun/work/yolov5/data/labData/corn-n-soy.yaml"
classList = ["weed"]

# training parameters
imgSize = 640
batchSize = 32
epochs = 300
weightPath = "/home/varun/work/yolov5/yolov5s.pt"
device = "0"
optimizer = "Adam" #(choose from 'SGD', 'Adam', 'AdamW')  
workers = 16
projectSaveDir = "/home/varun/work/yolov5/runs/acres22young/train"
projectName = "corn-n-soy-v1" # name of the project (relative to projectSaveDir)
savePeriod = 10
cacheRAM = True
useWandb = True
condaEnvName = "yv5"

In [None]:
import os

pN = os.path.join(projectSaveDir, projectName)
## Setups train command
if cacheRAM:
    trainString = f"python train.py --img {imgSize} --batch {batchSize} --epochs {epochs} --data {yamlPath} --weights {weightPath} --device {device} --optimizer {optimizer} --workers {workers} --project {pN} --save-period {savePeriod} --cache ram"
else:
    trainString = f"python train.py --img {imgSize} --batch {batchSize} --epochs {epochs} --data {yamlPath} --weights {weightPath} --device {device} --optimizer {optimizer} --workers {workers} --project {pN} --save-period {savePeriod}"

In [None]:
## Create yaml file
with open(yamlPath, "w") as f:
    f.write("# YAML file for Project: "+ projectName+"\n")
    f.write("# Training Command:\n")
    f.write("# "+trainString+ "\n")
    f.write("train: " + os.path.join(dataRoot, trainRelativePath) + "\n")
    f.write("val: " + os.path.join(dataRoot, valRelativePath) + "\n")
    f.write("nc: " + str(len(classList)) + "\n")
    f.write("names:\n")
    for i, cls in enumerate(classList):
        f.write("  "+str(i)+": "+cls + "\n")

In [None]:
# create train run script
with open("runTrain_"+projectName+".sh", "w") as f:
    f.write("#!/bin/bash\n")
    f.write(trainString)

print("Run the following commands to start training:")
print("tmux new -s "+projectName)
print("ca yv5")
if useWandb:
    print("wandb online")
else:
    print("wandb offline")
print("sh runTrain_"+projectName+".sh")

# 2. Testing

In [None]:
# input related parameters
projectName = "corn-n-soy-v1" # for reading the weights
exp = "exp"
weightsDir = f"runs/acres22young/train/{projectName}/{exp}/weights/best.pt"

# output related parameters
testCrop = 'soy' # 'corn' or 'soy'
day = 'day0' # day0, day3, day5
testImgsDir = f"split/{testCrop}/{day}/*"
projectRootDir = "/home/varun/work/yolov5/runs/acres22young/detect/"
detectSaveName = f"{projectName}-{exp}-{testCrop}-{day}" # projectName for saving the detection results (Name is mix of training dataset and test dataset)

# detection parameters
confThres = 0.25
iouThres = 0.45

In [None]:
# create detect run script
testString = f"python detect.py --weights {weightsDir} --source '{testImgsDir}' --save-txt --project {projectRootDir} --name {detectSaveName} --conf-thres {confThres} --iou-thres {iouThres}"
with open("runDetect_"+detectSaveName+".sh", "w") as f:
    f.write("#!/bin/bash\n")
    f.write(testString)

print("Ensure you in root directory of yolov5, then run the following commands to start training:")
print()
print("tmux new -s "+detectSaveName)
print("ca yv5")
print("sh runDetect_"+detectSaveName+".sh")

# 3. Counting and validating with ground truth task

This is part of research effort in collboration with Dr. Young in 2022.
Dataset: images collected at ACRES in Summer of 2022

In [5]:
# Files to read
rootDataFolder = 'runs/acres22young/dataSideProject'
cornCountYoung = 'ACRE-069-CORN_Weed Counts and Application Info.xlsx'
sheetCornCountYoung = 'Total Weed Density by Specie'
soyCountYoung = 'ACRE-131-SOYBEAN_Weed Counts and Application Info.xlsx'
sheetSoyCountYoung = 'Total Weed Density by Specie'

#Folder in which output should be saved
save_folder = 'runs/acres22young/reports'
reportNameCorn = "cornCountACRES69.xlsx"
reportNameSoy = "soyCountACRES131.xlsx"

# parameters for getting detection results in sheet 
cornDayList = ["day0", "day3", "day5"]
soyDayList = ["day0"]
trialNameList = ["corn-n-soy-v1-exp", "onlyCorn-v1-exp6", "onlySoy-v1-exp"] # check the name of the folder in which detection results are saved
cropList = ["corn", "soy"]
detectResultsFolder = "runs/acres22young/detect"

In [6]:
import os
import re
import collections
import numpy as np
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

# Handling File Paths
cornCountYoung = os.path.join(rootDataFolder, cornCountYoung)
soyCountYoung = os.path.join(rootDataFolder, soyCountYoung)

#Creating save_folder
if save_folder not in os.listdir():
    if save_folder.rsplit('/', 1)[1] not in os.listdir(save_folder.rsplit('/', 1)[0]):
        os.mkdir(save_folder)

## 3.1 Getting weed counts in soy and corn

In [7]:
def processDataframes(crop, dayList, reportName, columnsToKeep, dfYoung):
    columns = ['Plot']
    columns.extend(trialNameList)
    dfDay = pd.DataFrame(columns=columns)
    dfDay['Plot'] = [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
                     201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
                     301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,]

    for day in dayList:
        df = dfDay.copy()
        for idx, trialName in enumerate(trialNameList):
            detectResultsPath = os.path.join(detectResultsFolder, trialName+"-"+crop+"-"+day, "labels")
            # get detection count
            for fileName in os.listdir(detectResultsPath):
                # get plot number from filename
                pN = fileName.split("Tile")[0].split("plot")[1]
                with open(os.path.join(detectResultsPath,fileName)) as f:
                    # get current count for plot number and increment by 1
                    currentCount = df.loc[df['Plot'] == int(pN)].values[0][1+idx]
                    for line in f:
                        if line.strip():
                            if currentCount is np.nan:
                                df.loc[df['Plot'] == int(pN), trialName] = 0
                                currentCount = 0
                            currentCount += 1
                df.loc[df['Plot'] == int(pN), trialName] = currentCount
        if day == "day0" or day == "day5":
            df = combineManualCount(df, dfYoung, day, columnsToKeep.copy())
            df = addDifferenceColumns(df)
            df = addPercentDifferenceColumns(df)
            createPlots(df, save_folder, reportName, crop, day)
        saveDataframe(df, save_folder, reportName, day)
        
    return df

def addDifferenceColumns(df):
    for col in trialNameList:
        colName = "Diff: " + col 
        df[colName] = df[col] - df["Manual Count"]
    return df

def addPercentDifferenceColumns(df):
    for col in trialNameList:
        colName = "Error %: " + col 
        # iterate over df rows
        for idx, row in df.iterrows():
            # get the value of manual
            manCount = row["Manual Count"]
            # get the value of diff
            if row["Diff: " + col] is np.nan:
                df.loc[idx, colName] = np.nan
                continue
            if row["Manual Count"] == 0:
                df.loc[idx, colName] = np.inf
                continue
            # calculate the percent difference
            percentDiff = (row["Diff: " + col]/row["Manual Count"])*100
            df.loc[idx, colName] = percentDiff
    return df

def combineManualCount(df, dfYoung, day, columnsToKeep):
    plotCountVarun = df.copy()
    day = int(day.replace('day', ''))
    query = f"DAA=={day}"
    plotCountYoung = dfYoung.query(query)[columnsToKeep].sort_values(by='Plot').reset_index(drop=True)

    plotCountVarun = plotCountVarun.astype({'Plot': 'int16'})
    plotCountYoung = plotCountYoung.astype({'Plot': 'int16'})
    dfTemp = pd.merge(plotCountYoung, plotCountVarun, on='Plot', how='outer')
    
    columnsToKeep.extend(trialNameList)
    dfTemp = dfTemp[columnsToKeep]
    dfTemp.rename(columns={'TOTAL':'Manual Count', 'Plot':'Plot Number'}, inplace=True)
    return dfTemp


def saveDataframe(df, save_folder, fileName, day):
    if os.path.exists(os.path.join(save_folder, fileName)):
        with pd.ExcelWriter(os.path.join(save_folder, fileName), if_sheet_exists="replace", mode='a', engine='openpyxl') as writer:
            df.to_excel(writer, sheet_name=day, index=False)
    else:
        with pd.ExcelWriter(os.path.join(save_folder, fileName), engine='openpyxl') as writer:
            df.to_excel(writer, sheet_name=day, index=False)

def createPlots(df, save_folder, fileName, crop, day):
    # Create Error Boxplot
    plt.figure(figsize=(20,10))
    ax = sns.boxplot(data=df.filter(regex="Error.*"), orient="h", palette="colorblind")
    ax.set_title(crop.capitalize()+" | "+day.capitalize())
    ax.set_xlabel("Error %")
    ax.set_ylabel("Trial Name")
    ax.set_xlim(-100, 100)
    plt.savefig(os.path.join(save_folder, crop+"_"+day+"_error.png"))
    # plt.show()
    plt.close()

    # Create Difference Boxplot
    plt.figure(figsize=(20,10))
    ax = sns.boxplot(data=df.filter(regex="Diff.*"), orient="h", palette="colorblind")
    ax.set_title(crop.capitalize()+" | "+day.capitalize())
    ax.set_xlabel("Count Difference")
    ax.set_ylabel("Trial Name")
    ax.set_xlim(-600, 600)
    plt.savefig(os.path.join(save_folder, crop+"_"+day+"_difference.png"))
    # plt.show()
    plt.close()

    # Create Error Plot based on size column
    savePath = os.path.join(save_folder, "Graphs Based on Size")
    for size in df['Size'].unique():
        if np.isnan(size):
            continue
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        dfTemp = df[df['Size'] == size]
        plt.figure(figsize=(20,10))
        ax = sns.boxplot(data=dfTemp.filter(regex="Error.*"), orient="h", palette="colorblind")
        ax.set_title(crop.capitalize()+" | "+day.capitalize()+" | Size "+str(int(size)))
        ax.set_xlabel("Error %")
        ax.set_ylabel("Trial Name")
        ax.set_xlim(-100, 100)
        plt.savefig(os.path.join(savePath, crop+"_"+day+"_error_size_"+str(int(size))+".png"))
        # plt.show()
        plt.close()
    
    # Create Difference Plot based on size column
    for size in df['Size'].unique():
        if np.isnan(size):
            continue
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        dfTemp = df[df['Size'] == size]
        plt.figure(figsize=(20,10))
        ax = sns.boxplot(data=dfTemp.filter(regex="Diff.*"), orient="h", palette="colorblind")
        ax.set_title(crop.capitalize()+" | "+day.capitalize()+" | Size "+str(int(size)))
        ax.set_xlabel("Count Difference")
        ax.set_ylabel("Trial Name")
        ax.set_xlim(-500, 500)
        plt.savefig(os.path.join(savePath, crop+"_"+day+"_difference_size_"+str(int(size))+".png"))
        # plt.show()
        plt.close()
    
    # Create Error Plot based on crop stage column
    savePath = os.path.join(save_folder, "Graphs Based on Crop Stage")
    for stage in df[crop.capitalize()+' Stage'].unique():
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        dfTemp = df[df[crop.capitalize()+' Stage'] == stage]
        plt.figure(figsize=(20,10))
        ax = sns.boxplot(data=dfTemp.filter(regex="Error.*"), orient="h", palette="colorblind")
        ax.set_title(crop.capitalize()+" | "+day.capitalize()+" | Stage "+stage)
        ax.set_xlabel("Error %")
        ax.set_ylabel("Trial Name")
        ax.set_xlim(-100, 100)
        plt.savefig(os.path.join(savePath, crop+"_"+day+"_error_stage_"+crop+'_'+stage.replace("/","-")+".png"))
        # plt.show()
        plt.close()
    
    # Create Count Difference Plot based on crop stage column
    for stage in df[crop.capitalize()+' Stage'].unique():
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        dfTemp = df[df[crop.capitalize()+' Stage'] == stage]
        plt.figure(figsize=(20,10))
        ax = sns.boxplot(data=dfTemp.filter(regex="Diff.*"), orient="h", palette="colorblind")
        ax.set_title(crop.capitalize()+" | "+day.capitalize()+" | Stage "+stage)
        ax.set_xlabel("Count Difference")
        ax.set_ylabel("Trial Name")
        ax.set_xlim(-500, 500)
        plt.savefig(os.path.join(savePath, crop+"_"+day+"_difference_stage_"+crop+'_'+stage.replace("/","-")+".png"))
        # plt.show()
        plt.close()

    # Create Error Plot based on treatment column
    savePath = os.path.join(save_folder, "Graphs Based on Treatment")
    for treatment in df['Treatment'].unique():
        if not isinstance(treatment, int):
            continue
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        dfTemp = df[df['Treatment'] == treatment]
        plt.figure(figsize=(20,10))
        ax = sns.boxplot(data=dfTemp.filter(regex="Error.*"), orient="h", palette="colorblind")
        ax.set_title(crop.capitalize()+" | "+day.capitalize()+" | Treatment "+str(int(treatment)))
        ax.set_xlabel("Error %")
        ax.set_ylabel("Trial Name")
        ax.set_xlim(-100, 100)
        plt.savefig(os.path.join(savePath, crop+"_"+day+"_error_treatment_"+str(int(treatment))+".png"))
        # plt.show()
        plt.close()
    
    # Create Count Difference Plot based on treatment column
    for treatment in df['Treatment'].unique():
        if not isinstance(treatment, int):
            continue
        if not os.path.exists(savePath):
            os.makedirs(savePath)
        dfTemp = df[df['Treatment'] == treatment]
        plt.figure(figsize=(20,10))
        ax = sns.boxplot(data=dfTemp.filter(regex="Diff.*"), orient="h", palette="colorblind")
        ax.set_title(crop.capitalize()+" | "+day.capitalize()+" | Treatment "+str(int(treatment)))
        ax.set_xlabel("Count Difference")
        ax.set_ylabel("Trial Name")
        ax.set_xlim(-500, 500)
        plt.savefig(os.path.join(savePath, crop+"_"+day+"_difference_treatment_"+str(int(treatment))+".png"))
        # plt.show()
        plt.close()

### 3.2 Corn Processing

In [None]:
##### FOR CORN #####
columnsToKeep = ['Plot','Treatment','Corn Stage', 'Size','TOTAL']
dfYoung = pd.read_excel(cornCountYoung, sheet_name=sheetCornCountYoung)
df = processDataframes("corn", cornDayList, reportNameCorn, columnsToKeep, dfYoung)

### 3.3 Soy Processing

In [8]:
##### FOR SOY #####
columnsToKeep = ['Plot','Treatment','Soy Stage','Size','TOTAL']
dfYoung = pd.read_excel(soyCountYoung, sheet_name=sheetSoyCountYoung)
df = processDataframes("soy", soyDayList, reportNameSoy, columnsToKeep, dfYoung)

### 3.4 Training Data Statistics

- Create an excel sheet
    - Number of training images in each plot
    - Number of training instances per plot
    - Growth stage, size and treatment correspnding to each plot

In [49]:
# Get training data Statistics
# dataset parameters
dataRoot = "/home/varun/work/yolov5/data/labData"
trainRelativePath = "labels"
trainPathCorn = os.path.join(dataRoot, "corn", trainRelativePath)
trainPathSoy = os.path.join(dataRoot, "soy", trainRelativePath)

# Files to read
dataRoot = 'runs/acres22young/dataSideProject'
cornCountYoung = 'ACRE-069-CORN_Weed Counts and Application Info.xlsx'
cornCountYoung = os.path.join(dataRoot, cornCountYoung)
sheetCornCountYoung = 'Total Weed Density by Specie'
soyCountYoung = 'ACRE-131-SOYBEAN_Weed Counts and Application Info.xlsx'
soyCountYoung = os.path.join(dataRoot, soyCountYoung)
sheetSoyCountYoung = 'Total Weed Density by Specie'

#Folder in which output should be saved
save_folder = 'runs/acres22young/reports'
reportName = "trainDataStats.xlsx"

In [50]:
import glob
import pandas as pd
import numpy as np

In [51]:
##### FOR CORN #####
crop = "corn"
day = "day0"
df = pd.DataFrame(columns=['Plot','Number of Training Images','Number of Weeds'])
df['Plot'] = [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
              201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
              301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,]

# iterate over plot list
for idx, plot in df.iterrows():
    plot = plot['Plot']

    # Get number of trainig images 
    fileList = glob.glob(trainPathCorn+"/*plot"+str(plot)+"*.txt")
    df.loc[idx, 'Number of Training Images'] = len(fileList)

    # get number of weeds
    for fileName in fileList:
        with open(fileName) as f:
            # get current count for plot number and increment by 1
            currentCount = df.loc[df['Plot'] == plot].values[0][2]
            for line in f:
                if line.strip():
                    if currentCount is np.nan:
                        df.loc[df['Plot'] == plot, "Number of Weeds"] = 0
                        currentCount = 0
                    currentCount += 1
        df.loc[df['Plot'] == plot, "Number of Weeds"] = currentCount

# combine with young data
columnsToKeep = ['Plot','Treatment','Corn Stage', 'Size','TOTAL']
dfYoung = pd.read_excel(cornCountYoung, sheet_name=sheetCornCountYoung)
plotCountVarun = df.copy()
day = int(day.replace('day', ''))
query = f"DAA=={day}"
plotCountYoung = dfYoung.query(query)[columnsToKeep].sort_values(by='Plot').reset_index(drop=True)

plotCountVarun = plotCountVarun.astype({'Plot': 'int16'})
plotCountYoung = plotCountYoung.astype({'Plot': 'int16'})
dfTemp = pd.merge(plotCountYoung, plotCountVarun, on='Plot', how='outer')

columnsToKeep.extend(['Number of Training Images','Number of Weeds'])
dfTemp = dfTemp[columnsToKeep]
dfTemp.rename(columns={'TOTAL':'Manual Count', 'Plot':'Plot Number'}, inplace=True)

df = dfTemp[dfTemp['Number of Training Images'] != 0]

# save to excel
sN = crop+"-Day"+str(day)
if os.path.exists(os.path.join(save_folder, reportName)):
    with pd.ExcelWriter(os.path.join(save_folder, reportName), if_sheet_exists="replace", mode='a', engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name=sN, index=False)
else:
    with pd.ExcelWriter(os.path.join(save_folder, reportName), engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name=sN, index=False)

In [52]:
##### FOR SOY #####
crop = "soy"
day = "day0"
df = pd.DataFrame(columns=['Plot','Number of Training Images','Number of Weeds'])
df['Plot'] = [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
              201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
              301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312,]

# iterate over plot list
for idx, plot in df.iterrows():
    plot = plot['Plot']

    # Get number of trainig images 
    fileList = glob.glob(trainPathSoy+"/*plot"+str(plot)+"*.txt")
    df.loc[idx, 'Number of Training Images'] = len(fileList)

    # get number of weeds
    for fileName in fileList:
        with open(fileName) as f:
            # get current count for plot number and increment by 1
            currentCount = df.loc[df['Plot'] == plot].values[0][2]
            for line in f:
                if line.strip():
                    if currentCount is np.nan:
                        df.loc[df['Plot'] == plot, "Number of Weeds"] = 0
                        currentCount = 0
                    currentCount += 1
        df.loc[df['Plot'] == plot, "Number of Weeds"] = currentCount

# combine with young data
columnsToKeep = ['Plot','Treatment','Soy Stage', 'Size','TOTAL']
dfYoung = pd.read_excel(soyCountYoung, sheet_name=sheetSoyCountYoung)
plotCountVarun = df.copy()
day = int(day.replace('day', ''))
query = f"DAA=={day}"
plotCountYoung = dfYoung.query(query)[columnsToKeep].sort_values(by='Plot').reset_index(drop=True)

plotCountVarun = plotCountVarun.astype({'Plot': 'int16'})
plotCountYoung = plotCountYoung.astype({'Plot': 'int16'})
dfTemp = pd.merge(plotCountYoung, plotCountVarun, on='Plot', how='outer')

columnsToKeep.extend(['Number of Training Images','Number of Weeds'])
dfTemp = dfTemp[columnsToKeep]
dfTemp.rename(columns={'TOTAL':'Manual Count', 'Plot':'Plot Number'}, inplace=True)

df = dfTemp[dfTemp['Number of Training Images'] != 0]

# save to excel
sN = crop+"-Day"+str(day)
if os.path.exists(os.path.join(save_folder, reportName)):
    with pd.ExcelWriter(os.path.join(save_folder, reportName), if_sheet_exists="replace", mode='a', engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name=sN, index=False)
else:
    with pd.ExcelWriter(os.path.join(save_folder, reportName), engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name=sN, index=False)