# Modell Trainer
Dieses Notebook kann verwendet werden, um das Modell zu Trainieren.

# Requirements

In [None]:
%%capture
%pip install ultralytics
%pip install numpy
%pip install pandas
%pip install scipy
%pip install glob2
%pip install Pillow
%pip install ipywidgets

# Imports

In [None]:
import glob
import ipywidgets as widgets
import json
import matplotlib.cm as cm
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from IPython.display import display
from ipywidgets import Layout
from PIL import Image
import shutil
from pathlib import Path
from tqdm import tqdm
from ultralytics import YOLO


# Preview data

In [None]:
# Open the image file

label_mapping_file = (
    r"datasets\UNIMIB2016\orig\labels_segmentation\labelmap._darknet.labels"
)
with open(label_mapping_file, "r") as file:
    label_mapping = {i: line.rstrip() for i, line in enumerate(file)}


def get_plot(name):
    img_file = rf"datasets\UNIMIB2016\orig\images\{name}.jpg"
    label_file = rf"datasets\UNIMIB2016\orig\labels_segmentation\{name}.txt"
    img = Image.open(img_file)
    width, height = img.size
    fig, ax = plt.subplots(1)

    # Display the image
    ax.imshow(img)

    def parse_line(line):
        # Split the line into label id and coordinates
        segments = line.strip().split()
        label_id = int(segments[0])
        coordinates = list(map(float, segments[1:]))

        # Group the coordinates into pairs (x, y)
        vertices = list(zip(coordinates[::2], coordinates[1::2]))
        return label_id, vertices

    # Open the label file
    with open(label_file, "r") as f:
        lines = f.readlines()

    # For each line in the file, draw the polygon
    for i, line in enumerate(lines):
        # Parse the line to get the class id and the polygon points
        class_id, polygon_points = parse_line(
            line
        )  # You'll need to implement this function
        polygon_points = [(x * width, y * height) for x, y in polygon_points]
        # Create a Polygon patch
        poly = patches.Polygon(
            polygon_points, fill=True, alpha=0.5, color=cm.tab10(i % 10)
        )

        # Add the patch to the Axes
        ax.add_patch(poly)

        # Add the label
        centroid = np.mean(polygon_points, axis=0)
        plt.text(centroid[0], centroid[1], label_mapping[class_id], color="black")

        # Add the patch to the Axes
        ax.add_patch(poly)
    return plt


In [None]:
# Get list of image and label files
image_dir = "datasets/UNIMIB2016/orig/images/"
label_dir = "datasets/UNIMIB2016/orig/labels_segmentation/"
image_files = sorted(os.listdir(image_dir))
label_files = sorted(os.listdir(label_dir))


# Create widgets
output = widgets.Output()
# Create widgets
output = widgets.Output()
dropdown = widgets.Dropdown(
    options=image_files, value=image_files[0], description="Bild"
)
text = widgets.Text(value=image_files[0], description="Bildtext")


def update_widget(change):
    # Update slider value
    update_image(change.new)


def update_image(name):
    dropdown.value = name
    text.value = name
    # Update image
    output.clear_output()

    # Get the name of the image file without extension
    name = name.split(".")[0]

    # Display the image with polygons
    with output:
        get_plot(name).show()


dropdown.observe(update_widget, names="value")
text.observe(update_widget, names="value")
update_image(text.value)

# Display widgets20151130_114034 20151130_122541
display(widgets.HBox([dropdown, text]), output)


In [None]:
def get_labels(dir, files):
    def parse_line(line):
        segments = line.strip().split()
        label_id = int(segments[0])
        return label_mapping[label_id]

    output = {}
    for file in files[:-1]:
        with open(dir + file) as f:
            lines = f.readlines()
            classes = [parse_line(line) for line in lines]
        output[file] = classes
    return output


In [None]:
class FilterForToLittleOccurences():
    def __init__(self):
        self.df = self.get_df_with_labels_in_header()
        self.categories = self.get_food_that_have_less_than_4_occurences(self.df)
        self.files_to_filter_out = (
            self.get_files_with_food_that_have_too_little_occurences(
                self.df, self.categories
            )
        )
        # self.move_files(self.files_to_filter_out)

    def get_df_with_labels_in_header(self):
        labels = get_labels(label_dir, label_files)

        data = {
            "name": list(labels.keys()),
            **{
                key: [True if key in value else False for value in labels.values()]
                for key in set(
                    [item for sublist in labels.values() for item in sublist]
                )

            },
        }

        return pd.DataFrame(data)


    def get_food_that_have_less_than_4_occurences(self, df):
        counts = df.iloc[:, 1:].apply(lambda x: x.sum(), axis=0)

        foods_with_less_than_4 = counts.where(counts < 4).dropna()

        return foods_with_less_than_4.index.to_list()


    def get_files_with_food_that_have_too_little_occurences(self, df, categories):

        return df[df[categories].any(axis=1)].name.to_list()


    def move_files(self, files):
        images_dir = r"datasets\UNIMIB2016\orig\images"

        labels_dir = r"datasets\UNIMIB2016\orig\labels_segmentation"


        # Create the train, test, val directories if they don't exist


        os.makedirs("datasets/UNIMIB2016/sorted_out/images", exist_ok=True)

        os.makedirs("datasets/UNIMIB2016/sorted_out/labels", exist_ok=True)

        for file in files:
            shutil.move(
                os.path.join(images_dir, Path(file).stem + ".jpg"),
                "datasets/UNIMIB2016/sorted_out/images",
            )


            shutil.move(
                os.path.join(labels_dir, Path(file)),
                "datasets/UNIMIB2016/sorted_out/labels",

            )


FilterForToLittleOccurences().categories

In [None]:
labels = get_labels(label_dir, label_files)
df = pd.DataFrame(
    {"name": labels.keys(), "meals": [", ".join(labels) for labels in labels.values()]}
)

## look at labels

In [None]:
filtered_labels={food for label in labels.values() for food in label}

In [None]:
all_labels={"patate/pure",
"pasta_mare_e_monti",
"pizza",
"budino",
"mandarini",
"pasta_zafferano_e_piselli",
"arrosto",
"yogurt",
"pane",
"torta_salata_spinaci_e_ricotta",
"rosbeef",
"pizzoccheri",
"arancia",
"carote",
"fagiolini",
"pesce_(filetto)",
"spinaci",
"torta_cioccolato_e_pere",
"cotoletta",
"patatine_fritte",
"scaloppine",
"insalata_2_(uova mais)",
"insalata_mista",
"pasta_sugo",
"riso_sugo",
"minestra",
"pasta_bianco",
"mele",
"riso_bianco",
"pere",
"pasta_tonno_e_piselli",
"medaglioni_di_carne",
"pasta_ricotta_e_salsiccia",
"piselli",
"merluzzo_alle_olive",
"finocchi_in_umido",
"torta_ananas",
"passato_alla_piemontese",
"pasta_sugo_vegetariano",
"pasta_tonno",
"cibo_bianco_non_identificato",
"guazzetto_di_calamari",
"stinco_di_maiale",
"strudel",
"zucchine_impanate",
"zucchine_umido",
"roastbeef",
"crema_zucca_e_fagioli",
"lasagna_alla_bolognese",
"finocchi_gratinati",
"pasta_pancetta_e_zucchine",
"rucola",
"orecchiette_(ragu)",
"arrosto_di_vitello",
"pasta_e_ceci",
"torta_crema",
"torta_salata_(alla_valdostana)",
"pasta_cozze_e_vongole",
"banane",
"pasta_pesto_besciamella_e_cornetti",
"pasta_e_fagioli",
"torta_salata_rustica_(zucchine)",
"bruscitt",
"focaccia_bianca",
"pesce_2_(filetto)",
"torta_crema_2",
"pasta_sugo_pesce",
"polpette_di_carne",
"salmone_(da_menu_sembra_spada_in_realta)",
"cavolfiore",
"torta_salata_3",
"minestra_lombarda",
"patate/pure_prosciutto"}

In [None]:
all_labels-filtered_labels

In [None]:
df = pd.DataFrame(
    {"name": labels.keys(), "meals": [labels for labels in labels.values()]}
)


In [None]:
import os

folder = "datasets/UNIMIB2016/val/labels/"
files_to_filter_out = sorted(os.listdir(folder))


def get_label_counts(dir, files):
    data = get_labels(dir, files)
    counts = {}
    for values in data.values():
        for v in values:
            counts.setdefault(v, 0)
            counts[v] += 1
    return (
        pd.DataFrame({"name": counts.keys(), "counts": counts.values()})
        .sort_values("counts")
        .reset_index(drop=True)
    )


get_label_counts(folder, files_to_filter_out)


# Split Data


In [None]:
import os
import shutil
import numpy as np


def split_data_in_train_test_val():
    images_dir = r"datasets\UNIMIB2016\orig\images"
    labels_dir = r"datasets\UNIMIB2016\orig\labels_segmentation"

    # Create the train, test, val directories if they don't exist
    os.makedirs("datasets/UNIMIB2016/train/images", exist_ok=True)
    os.makedirs("datasets/UNIMIB2016/train/labels", exist_ok=True)
    os.makedirs("datasets/UNIMIB2016/test/images", exist_ok=True)
    os.makedirs("datasets/UNIMIB2016/test/labels", exist_ok=True)
    os.makedirs("datasets/UNIMIB2016/val/images", exist_ok=True)
    os.makedirs("datasets/UNIMIB2016/val/labels", exist_ok=True)

    # Get the list of image and label files
    image_files = sorted(os.listdir(images_dir))
    label_files = sorted(os.listdir(labels_dir))

    # Shuffle the files
    indices = np.arange(len(image_files))
    np.random.shuffle(indices)

    # Split the files
    train_indices = indices[: int(0.7 * len(indices))]
    val_indices = indices[int(0.7 * len(indices)) : int(0.85 * len(indices))]
    test_indices = indices[int(0.85 * len(indices)) :]

    for i in train_indices:
        shutil.copy(
            os.path.join(images_dir, image_files[i]), "datasets/UNIMIB2016/train/images"
        )
        shutil.copy(
            os.path.join(labels_dir, label_files[i]), "datasets/UNIMIB2016/train/labels"
        )

    for i in val_indices:
        shutil.copy(
            os.path.join(images_dir, image_files[i]), "datasets/UNIMIB2016/val/images"
        )
        shutil.copy(
            os.path.join(labels_dir, label_files[i]), "datasets/UNIMIB2016/val/labels"
        )

    for i in test_indices:
        shutil.copy(
            os.path.join(images_dir, image_files[i]), "datasets/UNIMIB2016/test/images"
        )
        shutil.copy(
            os.path.join(labels_dir, label_files[i]), "datasets/UNIMIB2016/test/labels"
        )


# split_data_in_train_test_val()


# Rotate

In [None]:
# Import the Image module from PIL library
def rotate_images_in_folder(folder):
    # Loop through all the jpg files in the folder myimages
    for file in tqdm(glob.glob(folder + "/*.jpg")):
        # Open the image file
        image = Image.open(file)
        # Rotate the image by 180 degrees
        image_rot = image.rotate(180)
        # Save the rotated image with the same file name
        image_rot.save(file)


# rotate_images_in_folder(r"C:\Users\malte.iwanicki\Documents\bachelor\BachelorInformatikAbschlussarbeit\src\datasets\UNIMIB2016\orig\images")


# Build Model

In [None]:
def get_model(model_path=None):
    if model_path:
        return YOLO(model_path)
    # return YOLO("yolov8n.yaml")  # build a new model from scratch
    return YOLO("yolov8m-seg.pt")  # load a pretrained model (recommended for training)


model = (
    get_model()
)

# Train Model

In [None]:
def train_model(model, epochs, data):
    model.train(
        data=data, epochs=epochs, task="segment", workers=8, batch=-1
    )  # train the model


train_model(model, epochs=100, data="config.yaml")


# Evaluate Model 

In [None]:
def evaluate_model(model):
    metrics = model.val()  # evaluate model performance on the validation set
    return metrics

evaluate_model(model)


# Load Model

In [None]:
def load_model(pt_file):
    model = YOLO(pt_file)
    return model

model = load_model(r"runs\segment\train10\weights\best.pt")


In [None]:
model.export()


# Predict with Model

In [None]:
def predict(model, image_file):
    im1 = Image.open(image_file)
    results = model.predict(source=im1, save=False)
    return results


# Render Result

In [None]:
# Open the image file
def get_plot(name):
    img_file = rf"datasets\UNIMIB2016\val\images\{name}.jpg"
    result = predict(model, img_file)
    plt = Image.fromarray(result[0].plot())
    plt.thumbnail((500, 400))
    return plt


# Get list of image and label files
image_dir = r"datasets\UNIMIB2016\val\images"
image_files = sorted(os.listdir(image_dir))

# Create widgets
output = widgets.Output()
dropdown = widgets.Dropdown(options=image_files)
button_next = widgets.Button(description=">", layout=Layout(width="auto"))
button_prev = widgets.Button(description="<", layout=Layout(width="auto"))


# Create function to update widgets
def update_image(name):
    # Clear the previous output
    output.clear_output()

    # Display the image with polygons
    with output:
        display(get_plot(os.path.splitext(name)[0]))


def on_button_next_clicked(b):
    dropdown.index = (dropdown.index + 1) % len(dropdown.options)


def on_button_prev_clicked(b):
    dropdown.index = (dropdown.index - 1) % len(dropdown.options)


# Attach the update function to dropdown changes
dropdown.observe(lambda change: update_image(change["new"]), names="value")
button_next.on_click(lambda b: on_button_next_clicked(b))
button_prev.on_click(lambda b: on_button_prev_clicked(b))

# Display widgets


In [None]:
display(widgets.HBox([button_prev, dropdown, button_next]), output)
