In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import cv2
import shutil
from functions import rotate_image, crop_image, blur_image, change_color, adjust_color_balance
import random

In [2]:
df = pd.read_csv("./dataset/cropped_images/augmented_data.csv")

In [3]:
df['target'].value_counts()

target
disease    213
healthy    134
Name: count, dtype: int64

In [4]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['sickness'], random_state=42)

print("Träningsuppsättning:")
print(train_df['sickness'].value_counts())

print("\nTestuppsättning:")
print(test_df['sickness'].value_counts())

Träningsuppsättning:
sickness
healthy_fish                         107
bacterial_diseases _aeromoniasis      37
fungal_diseases_saprolegniasis        37
bacterial_red disease                 35
viral_diseases_white_tail_disease     33
parasitic_diseases                    28
Name: count, dtype: int64

Testuppsättning:
sickness
healthy_fish                         27
bacterial_diseases _aeromoniasis     10
bacterial_red disease                 9
fungal_diseases_saprolegniasis        9
viral_diseases_white_tail_disease     8
parasitic_diseases                    7
Name: count, dtype: int64


In [5]:
train_df

Unnamed: 0.1,Unnamed: 0,path,sickness,target
30,30,./dataset/cropped_images/cropped_Bacterial_dis...,bacterial_diseases _aeromoniasis,disease
221,225,./dataset/cropped_images/cropped_Healthy_Fish_...,healthy_fish,healthy
297,305,./dataset/cropped_images/cropped_Parasitic_dis...,parasitic_diseases,disease
329,337,./dataset/cropped_images/cropped_Viral_disease...,viral_diseases_white_tail_disease,disease
251,256,./dataset/cropped_images/cropped_Healthy_Fish_...,healthy_fish,healthy
...,...,...,...,...
37,37,./dataset/cropped_images/cropped_Bacterial_dis...,bacterial_diseases _aeromoniasis,disease
281,289,./dataset/cropped_images/cropped_Parasitic_dis...,parasitic_diseases,disease
95,96,./dataset/cropped_images/cropped_Fungal_diseas...,fungal_diseases_saprolegniasis,disease
36,36,./dataset/cropped_images/cropped_Bacterial_dis...,bacterial_diseases _aeromoniasis,disease


In [6]:
test_df

Unnamed: 0.1,Unnamed: 0,path,sickness,target
72,73,./dataset/cropped_images/cropped_Bacterial_Red...,bacterial_red disease,disease
321,329,./dataset/cropped_images/cropped_Viral_disease...,viral_diseases_white_tail_disease,disease
199,202,./dataset/cropped_images/cropped_Healthy_Fish_...,healthy_fish,healthy
286,294,./dataset/cropped_images/cropped_Parasitic_dis...,parasitic_diseases,disease
115,116,./dataset/cropped_images/cropped_Fungal_diseas...,fungal_diseases_saprolegniasis,disease
...,...,...,...,...
28,28,./dataset/cropped_images/cropped_Bacterial_dis...,bacterial_diseases _aeromoniasis,disease
270,278,./dataset/cropped_images/cropped_healthy_fish_...,healthy_fish,healthy
188,191,./dataset/cropped_images/cropped_Healthy_Fish_...,healthy_fish,healthy
74,75,./dataset/cropped_images/cropped_Bacterial_Red...,bacterial_red disease,disease


In [7]:
def augment_and_save(image_path, output_folder, augmented_df, row, save_folder, data_label):
    original_image = cv2.imread(image_path)
    
    file_name, file_extension = os.path.splitext(os.path.basename(image_path))
    
    augmentation_functions = [
        (rotate_image, {'angle': 30}),
        (rotate_image, {'angle': 60}),
        (blur_image, {'k_size': 5}),
        (change_color, {'flag': cv2.COLOR_BGR2GRAY}),
        (adjust_color_balance, {'red_factor': 1.0, 'green_factor': 0.8, 'blue_factor': 0.8}),
    ]
    
    original_image_path = os.path.join(output_folder, save_folder, f'{file_name}_original{file_extension}')
    cv2.imwrite(original_image_path, original_image)
    
    augmented_images = []
    augmented_data = {
        'path': original_image_path,
        'sickness': row['sickness'],
        'target': row['target'],
        'data': data_label,
    }
    augmented_images.append(augmented_data)
    
    for idx, (aug_func, params) in enumerate(augmentation_functions):
        augmented_image = aug_func(original_image, **params)
        
        if 'rotate_image' in aug_func.__name__.lower():
            augmented_image_path = os.path.join(output_folder, save_folder, f'{file_name}_augmented_{idx}{file_extension}')
            cv2.imwrite(augmented_image_path, augmented_image)
            augmented_data = {
                'path': augmented_image_path,
                'sickness': row['sickness'],
                'target': row['target'],
                'data': data_label,
            }
            augmented_images.append(augmented_data)
            
            augmented_image_path_bw = os.path.join(output_folder, save_folder, f'{file_name}_augmented_{idx}_bw{file_extension}')
            cv2.imwrite(augmented_image_path_bw, cv2.cvtColor(augmented_image, cv2.COLOR_BGR2GRAY))
            augmented_data_bw = {
                'path': augmented_image_path_bw,
                'sickness': row['sickness'],
                'target': row['target'],
                'data': data_label,
            }
            augmented_images.append(augmented_data_bw)
        else:
            augmented_image_path = os.path.join(output_folder, save_folder, f'{file_name}_augmented_{idx}{file_extension}')
            cv2.imwrite(augmented_image_path, augmented_image)
            augmented_data = {
                'path': augmented_image_path,
                'sickness': row['sickness'],
                'target': row['target'],
                'data': data_label,
            }
            augmented_images.append(augmented_data)
    
    augmented_df = pd.concat([augmented_df, pd.DataFrame(augmented_images)], ignore_index=True)
    return augmented_df


In [8]:
output_folder = './dataset/cropped_splitted_and_augmented_data'
os.makedirs(os.path.join(output_folder, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_folder, 'test'), exist_ok=True)

In [9]:
augmented_df = pd.DataFrame(columns=['path', 'sickness', 'target', 'data'])
save_folder = 'train'

for index, row in train_df.iterrows():
    image_path = row['path']
    augmented_df = augment_and_save(image_path, output_folder, augmented_df, row, save_folder, data_label='train')

In [10]:
augmented_df

Unnamed: 0,path,sickness,target,data
0,./dataset/cropped_splitted_and_augmented_data\...,bacterial_diseases _aeromoniasis,disease,train
1,./dataset/cropped_splitted_and_augmented_data\...,bacterial_diseases _aeromoniasis,disease,train
2,./dataset/cropped_splitted_and_augmented_data\...,bacterial_diseases _aeromoniasis,disease,train
3,./dataset/cropped_splitted_and_augmented_data\...,bacterial_diseases _aeromoniasis,disease,train
4,./dataset/cropped_splitted_and_augmented_data\...,bacterial_diseases _aeromoniasis,disease,train
...,...,...,...,...
2211,./dataset/cropped_splitted_and_augmented_data\...,bacterial_red disease,disease,train
2212,./dataset/cropped_splitted_and_augmented_data\...,bacterial_red disease,disease,train
2213,./dataset/cropped_splitted_and_augmented_data\...,bacterial_red disease,disease,train
2214,./dataset/cropped_splitted_and_augmented_data\...,bacterial_red disease,disease,train


In [11]:
augmented_images = []
for index, row in test_df.iterrows():
    image_path = row['path']
        
    file_name, file_extension = os.path.splitext(os.path.basename(image_path))

    save_folder = 'test'

    original_image_path = os.path.join(output_folder, save_folder, f'{file_name}_original{file_extension}')
    shutil.copyfile(image_path, original_image_path)
    augmented_data = {
        'path': original_image_path,
        'sickness': row['sickness'],
        'target': row['target'],
        'data': 'test',
        }
    augmented_images.append(augmented_data)
augmented_df = pd.concat([augmented_df, pd.DataFrame(augmented_images)], ignore_index=True)

In [12]:
output_csv_path = './dataset/splitted_and_augmented_data/augmented_data.csv'

In [13]:
augmented_df.to_csv(output_csv_path, index=False)