# Start


Quick solution for tensorflow module not found


In [None]:
!pip install tensorflow

In [None]:
import os
import shutil

import kagglehub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image

## Import data


In [None]:
def import_raw_data():
    # Load dataset
    path = kagglehub.dataset_download(handle="ruchi798/periocular-detection")

    # Define path to files
    bbox_file = os.path.join(path, "Periocular  - bbox.csv")
    landmarks_file = os.path.join(path, "Periocular  - landmarks.csv")
    dataset_dir = os.path.join(path, "Dataset")
    images_dir = os.path.join(dataset_dir, "Dataset")

    # Load data
    bbox_df = pd.read_csv(bbox_file)
    landmarks_df = pd.read_csv(landmarks_file)

    all_files = os.listdir(images_dir)
    with_mask = sorted([file for file in all_files if "with-mask" in file])
    without_mask = sorted([file for file in all_files if "with-mask" not in file])

    without_mask_prefixes = {file.split(".")[0] for file in without_mask}
    with_mask_prefixes = {file.split("-with-mask")[0] for file in with_mask}

    all_prefixes = sorted(without_mask_prefixes | with_mask_prefixes)

    data = []
    for prefix in all_prefixes:
        image_id = prefix[3:] + ".jpg"
        without_mask_file = f"{prefix}.jpg" if prefix in without_mask_prefixes else None
        with_mask_file = f"{prefix}-with-mask.jpg" if prefix in with_mask_prefixes else None
        data.append(
            {
                "image_id": image_id,
                "Without_Mask": without_mask_file,
                "With_Mask": with_mask_file,
            }
        )

    df = pd.DataFrame(data)

    # Export data
    export_path = "../data/periocular/raw"
    target_dir = "../data/periocular/raw/images"
    os.makedirs(export_path, exist_ok=True)
    os.makedirs(target_dir, exist_ok=True)

    bbox_df.to_csv("../data/periocular/raw/bbox.csv", index=False)
    landmarks_df.to_csv("../data/periocular/raw/landmarks.csv", index=False)
    df.to_csv("../data/periocular/raw/images.csv", index=False)
    try:
        shutil.copytree(images_dir, target_dir, dirs_exist_ok=True)
        print(f"Successfully copied files to {target_dir}")
    except Exception as e:
        print(f"Error copying files: {e}")


import_raw_data()

## Definitons


In [None]:
df_bbox = pd.read_csv("../data/periocular/raw/bbox.csv")
df_landmarks = pd.read_csv("../data/periocular/raw/landmarks.csv")
df_images = pd.read_csv("../data/periocular/raw/images.csv")

# 4.1


## A


-   EDA a data preprocessing pre Vami vybrané charakteristiky z datasetu


### EDA


#### Table content


In [None]:
df_bbox.info()

In [None]:
df_bbox.head()

In [None]:
df_bbox.duplicated().sum()

In [None]:
df_landmarks.info()

In [None]:
df_landmarks.head()

In [None]:
df_landmarks.duplicated().sum()

In [None]:
df_images.info()

In [None]:
df_images.head()

In [None]:
df_images.duplicated().sum()

In [None]:
def show_image(file_path, title):
    path = "../data/periocular/raw/images"
    img = Image.open(os.path.join(path, file_path))
    plt.imshow(img)
    plt.title(title)
    plt.axis("off")


for _, row in df_images.head(3).iterrows():
    plt.figure(figsize=(10, 5))
    if pd.notnull(row["Without_Mask"]):
        plt.subplot(1, 2, 1)
        show_image(row["Without_Mask"], "Without Mask")
    if pd.notnull(row["With_Mask"]):
        plt.subplot(1, 2, 2)
        show_image(row["With_Mask"], "With Mask")
    plt.show()

#### Analysis of atributes


In [None]:
df_bbox.describe()

In [None]:
df_landmarks.describe()

#### Correlations


In [None]:
matica = df_bbox.iloc[:, 1:].corr()
matica
mask = np.triu(np.ones_like(matica, dtype=bool))
plt.figure(figsize=(10, 5))
sns.heatmap(matica, mask=mask, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
plt.xticks(rotation=45, ha="right")
plt.show()

In [None]:
matica = df_landmarks.iloc[:, 1:].corr()
matica
mask = np.triu(np.ones_like(matica, dtype=bool))
plt.figure(figsize=(10, 5))
sns.heatmap(matica, mask=mask, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
plt.xticks(rotation=45, ha="right")
plt.show()

del matica

-   There are 35 null values in images
-   There are no duplicates


#### Histograms


In [None]:
df_bbox.plot(kind="hist", bins=50, subplots=True, layout=(2, 2), figsize=(10, 5))
plt.plot()

In [None]:
df_landmarks.plot(kind="hist", bins=100, subplots=True, layout=(5, 2), figsize=(15, 20))
plt.plot()

#### Findings


-   There are 35 null values in images
-   There are no duplicates
-   Many features are not gaussian, some are but with very low variance


### Data preprocessing


#### Removing null values


In [None]:
df_images.dropna().reset_index(drop=True)

#### Outliers


In [None]:
plt.figsize = (10, 5)
sns.boxplot(data=df_bbox.iloc[:, 1:], orient="h")
plt.show()

-   Outliers are expected, as face in every picture can be in different position, size, etc.
-   Therefore we will not remove them.


In [None]:
plt.figsize = (10, 5)
sns.boxplot(data=df_landmarks.iloc[:, 1:], orient="h")
plt.show()

-   There are very few outliers, and it is also expected as people's facial features are also different.
-   Therefore we will not remove them.


#### Combining Data Frames


In [None]:
df_combined = pd.merge(df_bbox, df_landmarks, on="image_id")
df_combined = pd.merge(df_combined, df_images, on="image_id")
df_combined.dropna(inplace=True)
df_combined.reset_index(drop=True)
df_combined.drop(columns=["Without_Mask", "With_Mask"], inplace=True)
df_combined.set_index("image_id", inplace=True)
df_combined

#### Resizing images


In [None]:
def resize_images(images, target_size):
    output_dir = "../data/periocular/resized"
    os.makedirs(output_dir, exist_ok=True)
    for img_name in images.index:
        img_name = img_name.split(".")[0]
        img_name_without_mask = "img" + img_name + ".jpg"
        img_name_with_mask = "img" + img_name + "-with-mask.jpg"

        img_path_without_mask = os.path.join("../data/periocular/raw/images", img_name_without_mask)
        img_path_with_mask = os.path.join("../data/periocular/raw/images", img_name_with_mask)

        img = Image.open(img_path_without_mask)
        img_resized = img.resize(target_size)
        img_resized.save(os.path.join(output_dir, os.path.basename(img_path_without_mask)))

        img = Image.open(img_path_with_mask)
        img_resized = img.resize(target_size)
        img_resized.save(os.path.join(output_dir, os.path.basename(img_path_with_mask)))


resize_images(df_combined, target_size=(256, 256))

#### Splitting Data


In [None]:
# df_test, df_train = train_test_split(df_combined, test_size=0.2, random_state=42)

## B


-   Zdôvodnite výber ML/DL metód vzhľadom na Vami vybraný dataset pre 4.2


# 4.2


## A


-   Modeluje Vami tie vybrané charakteristiky pomocou vhodných ML/DL
    metód. Výsledok modelovania je najlepší model.


## B


-   Zhodnotíte Váš prístup a získaný výsledok
