In [1]:
import os
import pandas as pd
import glob

# Set path to results

In [2]:
results_path = "PATH_TO/covEcho/runs/detect/exp"
labels_path = os.path.join(results_path, "labels")

# Data preparations

Load input, labels and results structured into a single df

In [3]:
images_with_labels = os.listdir(results_path)
images_with_labels = [x for x in images_with_labels if x.endswith(".jpg")]

label_list = os.listdir(labels_path)
label_list = [x for x in label_list if x.endswith(".txt")]


Label df

In [4]:
# Make label dataframe
labels = pd.DataFrame(label_list, columns=["label_file"])

# Extract image name and frame from label file
labels_split = labels.label_file.str.split("_", expand=True, n=1)
labels_split.columns = ["Frame", "video_name"]
labels_split.Frame = labels_split.Frame.astype(int)
labels_split.video_name = labels_split.video_name.str.replace(".txt", "")

labels = pd.concat([labels, labels_split], axis=1)
# Add complete file path
labels["label_file_path"] = labels.label_file.apply(lambda x: os.path.join(labels_path, x))

  


Image df

In [5]:
# Make images dataframe
images = pd.DataFrame(images_with_labels, columns=["image_file"])

# Extract image name and frame from label file
images_split = images.image_file.str.split("_", expand=True, n=1)
images_split.columns = ["Frame", "video_name"]
images_split.Frame = images_split.Frame.astype(int)
images_split.video_name = images_split.video_name.str.replace(".jpg", "")

images = pd.concat([images, images_split], axis=1)
# Add complete file path
images["image_file_path"] = images.image_file.apply(lambda x: os.path.join(results_path, x))

  


In [6]:
result_files = pd.merge(images, labels, on=["Frame", "video_name"]).sort_values(by=["video_name", "Frame"])

Input files

In [7]:
input_files_path = "/itet-stor/mrichte/covlus_bmicnas02/maastricht_image_dataset/**/*.jpg"
input_files = glob.glob(input_files_path, recursive=True)

input = pd.DataFrame(input_files, columns=["input_file_path"])

# Extract columns from file path
input_split = input.input_file_path.str.split("/", expand=True)
input_split.columns = ["1", "2", "3", "4", "dataset", "Patient ID", "Bluepoint", "5", "input_file"]
input_split = input_split.drop(columns=["1", "2", "3", "4", "5"])
input = pd.concat([input, input_split], axis=1)

# Extract image name and frame from file name
input_split = input.input_file.str.split("_", expand=True, n=1)
input_split.columns = ["Frame", "video_name"]
input_split.Frame = input_split.Frame.astype(int)
input_split.video_name = input_split.video_name.str.replace(".jpg", "")

input = pd.concat([input, input_split], axis=1)


  app.launch_new_instance()


In [8]:
files = pd.merge(result_files, input, on=["Frame", "video_name"]).sort_values(by=["video_name", "Frame"])

Clinical data

In [9]:
clinical_data_path = "/itet-stor/mrichte/covlus_bmicnas02/clinical_data.csv"
clinical_data = pd.read_csv(clinical_data_path)

clinical_data = clinical_data[["Video ID", "clin_diagn#COVID19_pneumonia"]]
clinical_data.columns = ["Patient ID", "COVID19"]

In [None]:
df = pd.merge(files, clinical_data, on="Patient ID")

The Dataframe df now containes to path to all result files. Next we read in all the csv result files for each frame.

In [11]:
# Iterate over rows in label_file_path and read csv
df["label_csv"] = df.label_file_path.apply(lambda x: pd.read_csv(x, header=None, sep=" "))

In [None]:
# Iterate over rows in label_csv and extract the scores as well as the bounding box coordinates for each detected class
yolo_quality_score = []
yolo_quality = []
yolo_severity_score = []
yolo_detections = []

for idx, row in df.iterrows():
    i = row.label_csv
    # Extract quality and severity score
    quality_score = i.iloc[-1][3]
    quality = i.iloc[-2][0]
    severity_score = i.iloc[-2][3]
    yolo_quality_score.append(quality_score)
    yolo_quality.append(quality)
    yolo_severity_score.append(severity_score)

    # Reshape dataframe to get only the detections in right format
    i = i.iloc[:-2]
    i.columns = ["class", "confidence", "x", "y", "w", "h"]
    i["class"] = i["class"].astype(int)
    i["confidence"] = i["confidence"].astype(float)
    i["x"] = i["x"].astype(float)
    i["y"] = i["y"].astype(float)
    i["w"] = i["w"].astype(float)
    i["h"] = i["h"].astype(float)
    
    # Calculate area of bounding box
    i["area"] = i["w"] * i["h"]
    
    # append relevant row data to detections df
    i["video_name"] = row.video_name
    i["Frame"] = row.Frame

    yolo_detections.append(i)

In [13]:
df["yolo_quality_score"] = yolo_quality_score
df["yolo_quality"] = yolo_quality
df["yolo_severity_score"] = yolo_severity_score

In [15]:
# Concatenate all detections and merge with df
yolo_detection_df = pd.concat(yolo_detections)
yolo_detection_df = pd.merge(yolo_detection_df, df, on=["Frame", "video_name"])

In [None]:
# Add the class names from the yolo net

# Class names: ['0: Airbronchograms', '1: Alines', '2: Blines', '3: Bpatch', '4: Consolidations', '5: Pleura', '6: Rib', '7: Shadow']
class_names = ["Airbronchograms", "Alines", "Blines", "Bpatch", "Consolidations", "Pleura", "Rib", "Shadow"]

yolo_detection_df["class_name"] = yolo_detection_df["class"].apply(lambda x: class_names[x])

In [None]:
# Save to csv if needed
if not os.path.exists("yolo_detection_df.csv"):
    yolo_detection_df.to_csv("yolo_detection_df.csv", index=False)
else:
    print("Yolo file already exists")

if not os.path.exists("df.csv"):
    df.to_csv("df.csv", index=False)
else:
    print("df file already exists")

### Load yolo_detections_df if it already exists

In [128]:
import pandas as pd
import os

if os.path.exists("yolo_detection_df.csv"):
    yolo_detection_df = pd.read_csv("yolo_detection_df.csv")
else:
    print("Yolo file does not exist")

if os.path.exists("df.csv"):
    df = pd.read_csv("df.csv")
else:
    print("df file does not exist")