In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import cv2
import shutil
from functions import rotate_image, crop_image, blur_image, change_color, adjust_color_balance
import random

In [4]:
df = pd.read_csv("./dataset/SB-FishDisease.csv")

In [5]:
df['target'].value_counts()

target
disease    268
healthy    155
Name: count, dtype: int64

In [6]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['sickness'], random_state=42)

print("Träningsuppsättning:")
print(train_df['sickness'].value_counts())

print("\nTestuppsättning:")
print(test_df['sickness'].value_counts())

Träningsuppsättning:
sickness
healthy_fish                         113
bacterial_gill disease                43
fungal_diseases_saprolegniasis        37
bacterial_diseases _aeromoniasis      37
bacterial_red disease                 36
viral_diseases_white_tail_disease     33
parasitic_diseases                    28
healthy_gills                         11
Name: count, dtype: int64

Testuppsättning:
sickness
healthy_fish                         28
bacterial_gill disease               11
bacterial_diseases _aeromoniasis     10
fungal_diseases_saprolegniasis        9
bacterial_red disease                 9
viral_diseases_white_tail_disease     8
parasitic_diseases                    7
healthy_gills                         3
Name: count, dtype: int64


In [8]:
train_df

Unnamed: 0,path,sickness,target
119,./dataset/SB-FishDisease/Bacterial Red disease...,bacterial_red disease,disease
225,./dataset/SB-FishDisease/Healthy Fish\Healthy_...,healthy_fish,healthy
56,./dataset/SB-FishDisease/Bacterial gill diseas...,bacterial_gill disease,disease
109,./dataset/SB-FishDisease/Bacterial Red disease...,bacterial_red disease,disease
396,./dataset/SB-FishDisease/Viral diseases White ...,viral_diseases_white_tail_disease,disease
...,...,...,...
162,./dataset/SB-FishDisease/Fungal diseases Sapro...,fungal_diseases_saprolegniasis,disease
340,./dataset/SB-FishDisease/Healthy Gills\healthy...,healthy_gills,healthy
156,./dataset/SB-FishDisease/Fungal diseases Sapro...,fungal_diseases_saprolegniasis,disease
408,./dataset/SB-FishDisease/Viral diseases White ...,viral_diseases_white_tail_disease,disease


In [9]:
def augment_and_save(image_path, output_folder, augmented_df, row, save_folder, data_label):
    # Load the original image
    original_image = cv2.imread(image_path)
    
    # Extract file name and extension
    file_name, file_extension = os.path.splitext(os.path.basename(image_path))
    
    # Augmentation functions with their corresponding parameters
    augmentation_functions = [
        (rotate_image, {'angle': 30}),
        # (crop_image, {}),
        (blur_image, {'k_size': 5}),
        (change_color, {'flag': cv2.COLOR_BGR2GRAY}),
        (adjust_color_balance, {'red_factor': 1.0, 'green_factor': 0.8, 'blue_factor': 0.8}),
    ]
    
    # Determine whether to save in train or val
    
    # Save the original image
    original_image_path = os.path.join(output_folder, save_folder, f'{file_name}_original{file_extension}')
    cv2.imwrite(original_image_path, original_image)
    augmented_images = []
    augmented_data = {
        'path': original_image_path,
        'sickness': row['sickness'],
        'target': row['target'],
        'data': data_label,  # Assuming all augmented images are for training
        }
    augmented_images.append(augmented_data)
    # Apply augmentation functions
    for idx, (aug_func, params) in enumerate(augmentation_functions):
        augmented_image = aug_func(original_image, **params)
        
        # Save augmented image
        augmented_image_path = os.path.join(output_folder, save_folder, f'{file_name}_augmented_{idx}{file_extension}')
        cv2.imwrite(augmented_image_path, augmented_image)
        augmented_data = {
        'path': augmented_image_path,
        'sickness': row['sickness'],
        'target': row['target'],
        'data': data_label,  # Assuming all augmented images are for training
        }
        augmented_images.append(augmented_data)
    augmented_df = pd.concat([augmented_df, pd.DataFrame(augmented_images)], ignore_index=True)
    return augmented_df


In [10]:
output_folder = './dataset/splitted_and_augmented_data'
os.makedirs(os.path.join(output_folder, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_folder, 'test'), exist_ok=True)

In [11]:
augmented_df = pd.DataFrame(columns=['path', 'sickness', 'target', 'data'])
save_folder = 'train'

for index, row in train_df.iterrows():
    image_path = row['path']
    augmented_df = augment_and_save(image_path, output_folder, augmented_df, row, save_folder, data_label='train')

In [12]:
augmented_df

Unnamed: 0,path,sickness,target,data
0,./dataset/splitted_and_augmented_data\train\Ba...,bacterial_red disease,disease,train
1,./dataset/splitted_and_augmented_data\train\Ba...,bacterial_red disease,disease,train
2,./dataset/splitted_and_augmented_data\train\Ba...,bacterial_red disease,disease,train
3,./dataset/splitted_and_augmented_data\train\Ba...,bacterial_red disease,disease,train
4,./dataset/splitted_and_augmented_data\train\Ba...,bacterial_red disease,disease,train
...,...,...,...,...
1685,./dataset/splitted_and_augmented_data\train\Vi...,viral_diseases_white_tail_disease,disease,train
1686,./dataset/splitted_and_augmented_data\train\Vi...,viral_diseases_white_tail_disease,disease,train
1687,./dataset/splitted_and_augmented_data\train\Vi...,viral_diseases_white_tail_disease,disease,train
1688,./dataset/splitted_and_augmented_data\train\Vi...,viral_diseases_white_tail_disease,disease,train


In [13]:
augmented_images = []
for index, row in test_df.iterrows():
    image_path = row['path']
        
    # Extract file name and extension
    file_name, file_extension = os.path.splitext(os.path.basename(image_path))

    # Determine the save folder
    save_folder = 'test'

    # Save the original image to the 'test' folder
    original_image_path = os.path.join(output_folder, save_folder, f'{file_name}_original{file_extension}')
    shutil.copyfile(image_path, original_image_path)
    augmented_data = {
        'path': original_image_path,
        'sickness': row['sickness'],
        'target': row['target'],
        'data': 'test',  # Assuming all augmented images are for training
        }
    augmented_images.append(augmented_data)
augmented_df = pd.concat([augmented_df, pd.DataFrame(augmented_images)], ignore_index=True)

In [14]:
output_csv_path = './dataset/splitted_and_augmented_data/augmented_data.csv'

In [15]:
augmented_df.to_csv(output_csv_path, index=False)

: 