In [1]:
!pip show opencv-python
from google.colab import drive
drive.mount('/content/drive')

import cv2
import os
import pandas as pd

Name: opencv-python
Version: 4.8.0.76
Summary: Wrapper package for OpenCV python bindings.
Home-page: https://github.com/opencv/opencv-python
Author: 
Author-email: 
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: numpy, numpy, numpy, numpy
Required-by: dopamine-rl, imgaug
Mounted at /content/drive


In [2]:
df_train = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/train_df_100.csv")
df_train

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-020.png,benign,25197,F
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-15792-100-008.png,malignant,15792,DC
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_PT-14-21998AB-100-033.png,benign,21998AB,PT
...,...,...,...,...,...,...
1081,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-11951-100-015.png,malignant,11951,DC
1082,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_MC-14-13413-100-032.png,malignant,13413,MC
1083,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-4372-100-013.png,malignant,4372,DC
1084,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_LC-14-15570-100-042.png,malignant,15570,LC


In [3]:
import cv2
import os
import pandas as pd
import numpy as np

# Function to apply rotation to the image
def apply_rotation(image, angle):
    rows, cols, _ = image.shape
    rotation_matrix = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
    rotated = cv2.warpAffine(image, rotation_matrix, (cols, rows))
    return rotated

# Function to apply random translation to the image
def apply_random_translation(image):
    rows, cols, _ = image.shape
    x_translation = np.random.randint(-50, 50)  # Random horizontal translation
    y_translation = np.random.randint(-50, 50)  # Random vertical translation
    translation_matrix = np.float32([[1, 0, x_translation], [0, 1, y_translation]])
    translated = cv2.warpAffine(image, translation_matrix, (cols, rows))
    return translated

# Function to apply random scaling to the image
def apply_random_scaling(image):
    scale_factor = np.random.uniform(0.5, 2.0)  # Random scale factor between 0.5 and 2.0
    scaled = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
    return scaled

# Function to add random noise to the image
def add_random_noise(image):
    noise = np.random.normal(loc=0, scale=25, size=image.shape)  # Random Gaussian noise
    noisy_image = np.clip(image + noise, 0, 255).astype(np.uint8)
    return noisy_image

# Load the DataFrame
df_train = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/train_df_100.csv")

# Define target folder for augmented images
target_folder = '/content/drive/MyDrive/Breast Cancer Project/Augmentation/Mag100'

# Create target folder if it doesn't exist
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Create a list to store the paths of augmented images along with their corresponding columns
augmented_image_info = []

# Loop through each row in the filtered DataFrame
for index, row in df_train.iterrows():
    # Read image from path
    image_path = row['path']
    image = cv2.imread(image_path)

    # Apply vertical flip
    vertical_flipped = cv2.flip(image, 0)
    vertical_flip_path = os.path.join(target_folder, f"{index}_vertical_flip.png")
    cv2.imwrite(vertical_flip_path, vertical_flipped)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': vertical_flip_path})

    # Apply horizontal flip
    horizontal_flipped = cv2.flip(image, 1)
    horizontal_flip_path = os.path.join(target_folder, f"{index}_horizontal_flip.png")
    cv2.imwrite(horizontal_flip_path, horizontal_flipped)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': horizontal_flip_path})

    # Apply rotation (30 degrees)
    rotated = apply_rotation(image, 30)
    rotated_path = os.path.join(target_folder, f"{index}_rotated_30.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

    # Apply rotation (45 degrees)
    rotated = apply_rotation(image, 45)
    rotated_path = os.path.join(target_folder, f"{index}_rotated_45.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

    # Apply rotation (60 degrees)
    rotated = apply_rotation(image, 60)
    rotated_path = os.path.join(target_folder, f"{index}_rotated_60.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

    # Apply rotation (75 degrees)
    rotated = apply_rotation(image, 75)
    rotated_path = os.path.join(target_folder, f"{index}_rotated_75.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

    # Apply rotation (90 degrees)
    rotated = apply_rotation(image, 90)
    rotated_path = os.path.join(target_folder, f"{index}_rotated_90.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

    # Apply random translation
    translated = apply_random_translation(image)
    translated_path = os.path.join(target_folder, f"{index}_translated.png")
    cv2.imwrite(translated_path, translated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': translated_path})

    # Apply random scaling
    scaled = apply_random_scaling(image)
    scaled_path = os.path.join(target_folder, f"{index}_scaled.png")
    cv2.imwrite(scaled_path, scaled)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': scaled_path})

    # Apply random noise addition
    noisy_image = add_random_noise(image)
    noisy_path = os.path.join(target_folder, f"{index}_noisy.png")
    cv2.imwrite(noisy_path, noisy_image)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': noisy_path})

# Convert the list of dictionaries to a DataFrame
augmented_df = pd.DataFrame(augmented_image_info)

# Save the DataFrame to a CSV file
augmented_df.to_csv('/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented.csv', index=False)


In [4]:
augmented_df = pd.read_csv('/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented.csv')

In [5]:
augmented_df

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type,augmented_path
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
...,...,...,...,...,...,...,...
10855,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-029.png,benign,25197,F,/content/drive/MyDrive/Breast Cancer Project/A...
10856,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-029.png,benign,25197,F,/content/drive/MyDrive/Breast Cancer Project/A...
10857,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-029.png,benign,25197,F,/content/drive/MyDrive/Breast Cancer Project/A...
10858,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-029.png,benign,25197,F,/content/drive/MyDrive/Breast Cancer Project/A...


In [6]:
import pandas as pd

# Create a DataFrame
df = augmented_df

# Create two separate dataframes with the required columns
df1 = df[['mag', 'path', 'filename', 'class', 'slide_id', 'tumor_type']]
df2 = df[['mag', 'augmented_path', 'filename', 'class', 'slide_id', 'tumor_type']]

# Rename the columns for the second dataframe
df2.columns = ['mag', 'path', 'filename', 'class', 'slide_id', 'tumor_type']

# Concatenate the two dataframes
result_df = pd.concat([df1, df2])

# Write the result to a new CSV file
result_df

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
...,...,...,...,...,...,...
10855,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
10856,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
10857,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
10858,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F


In [7]:
# Drop duplicates based on specific columns
df_deduplicated = result_df.drop_duplicates(subset=['path'])

# Display the deduplicated DataFrame
df_deduplicated.to_csv('/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented_final.csv', index=False)

In [8]:
df_deduplicated

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
10,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F
20,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-020.png,benign,25197,F
30,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-15792-100-008.png,malignant,15792,DC
40,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_PT-14-21998AB-100-033.png,benign,21998AB,PT
...,...,...,...,...,...,...
10855,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
10856,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
10857,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
10858,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
