In [1]:
!pip show opencv-python
from google.colab import drive
drive.mount('/content/drive')

import cv2
import os
import pandas as pd

Name: opencv-python
Version: 4.8.0.76
Summary: Wrapper package for OpenCV python bindings.
Home-page: https://github.com/opencv/opencv-python
Author: 
Author-email: 
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: numpy, numpy, numpy, numpy
Required-by: dopamine-rl, imgaug
Mounted at /content/drive


In [17]:
df_train = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/train_df_100.csv")
df_train

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-020.png,benign,25197,F
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-15792-100-008.png,malignant,15792,DC
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_PT-14-21998AB-100-033.png,benign,21998AB,PT
...,...,...,...,...,...,...
1081,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-11951-100-015.png,malignant,11951,DC
1082,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_MC-14-13413-100-032.png,malignant,13413,MC
1083,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-4372-100-013.png,malignant,4372,DC
1084,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_LC-14-15570-100-042.png,malignant,15570,LC


In [18]:
df_train = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/train_df_100.csv")

# Define target folder for augmented images
target_folder = '/content/drive/MyDrive/Breast Cancer Project/Augmentation/Mag100'

# Create target folder if it doesn't exist
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Create a list to store the paths of augmented images along with their corresponding columns
augmented_image_info = []

# Loop through each row in the filtered DataFrame
for index, row in df_train.iterrows():
    # Read image from path
    image_path = row['path']
    image = cv2.imread(image_path)

    # Apply vertical flip
    vertical_flipped = cv2.flip(image, 0)
    vertical_flip_path = os.path.join(target_folder, f"{index}_vertical_flip.png")
    cv2.imwrite(vertical_flip_path, vertical_flipped)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': vertical_flip_path})

    # Apply horizontal flip
    horizontal_flipped = cv2.flip(image, 1)
    horizontal_flip_path = os.path.join(target_folder, f"{index}_horizontal_flip.png")
    cv2.imwrite(horizontal_flip_path, horizontal_flipped)
    augmented_image_info.append({ 'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': horizontal_flip_path})

    # Apply rotation (90 degrees clockwise)
    rows, cols, _ = image.shape
    rotation_matrix = cv2.getRotationMatrix2D((cols/2, rows/2), 90, 1)
    rotated = cv2.warpAffine(image, rotation_matrix, (cols, rows))
    rotated_path = os.path.join(target_folder, f"{index}_rotated.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

# Convert the list of dictionaries to a DataFrame
augmented_df = pd.DataFrame(augmented_image_info)

# Save the DataFrame to a CSV file
augmented_df.to_csv('/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented.csv', index=False)

In [27]:
augmented_df = pd.read_csv('/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented.csv')

In [28]:
augmented_df

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type,augmented_path
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC,/content/drive/MyDrive/Breast Cancer Project/A...
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F,/content/drive/MyDrive/Breast Cancer Project/A...
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F,/content/drive/MyDrive/Breast Cancer Project/A...
...,...,...,...,...,...,...,...
3253,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_LC-14-15570-100-042.png,malignant,15570,LC,/content/drive/MyDrive/Breast Cancer Project/A...
3254,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_LC-14-15570-100-042.png,malignant,15570,LC,/content/drive/MyDrive/Breast Cancer Project/A...
3255,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-029.png,benign,25197,F,/content/drive/MyDrive/Breast Cancer Project/A...
3256,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-25197-100-029.png,benign,25197,F,/content/drive/MyDrive/Breast Cancer Project/A...


In [29]:
import pandas as pd

# Create a DataFrame
df = augmented_df

# Create two separate dataframes with the required columns
df1 = df[['mag', 'path', 'filename', 'class', 'slide_id', 'tumor_type']]
df2 = df[['mag', 'augmented_path', 'filename', 'class', 'slide_id', 'tumor_type']]

# Rename the columns for the second dataframe
df2.columns = ['mag', 'path', 'filename', 'class', 'slide_id', 'tumor_type']

# Concatenate the two dataframes
result_df = pd.concat([df1, df2])

# Write the result to a new CSV file
result_df

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-19440-100-033.png,malignant,19440,PC
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-100-025.png,benign,21998EF,F
...,...,...,...,...,...,...
3253,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_M_LC-14-15570-100-042.png,malignant,15570,LC
3254,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_M_LC-14-15570-100-042.png,malignant,15570,LC
3255,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F
3256,100,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_B_F-14-25197-100-029.png,benign,25197,F


In [31]:
# Drop duplicates based on specific columns
df_deduplicated = result_df.drop_duplicates(subset=['path'])

# Display the deduplicated DataFrame
df_deduplicated.to_csv('/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented_final.csv', index=False)

In [25]:
df_train = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag200/train_df_200.csv")

# Define target folder for augmented images
target_folder = '/content/drive/MyDrive/Breast Cancer Project/Augmentation/Mag200'

# Create target folder if it doesn't exist
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Create a list to store the paths of augmented images along with their corresponding columns
augmented_image_info = []

# Loop through each row in the filtered DataFrame
for index, row in df_train.iterrows():
    # Read image from path
    image_path = row['path']
    image = cv2.imread(image_path)

    # Apply vertical flip
    vertical_flipped = cv2.flip(image, 0)
    vertical_flip_path = os.path.join(target_folder, f"{index}_vertical_flip.png")
    cv2.imwrite(vertical_flip_path, vertical_flipped)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': vertical_flip_path})

    # Apply horizontal flip
    horizontal_flipped = cv2.flip(image, 1)
    horizontal_flip_path = os.path.join(target_folder, f"{index}_horizontal_flip.png")
    cv2.imwrite(horizontal_flip_path, horizontal_flipped)
    augmented_image_info.append({ 'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': horizontal_flip_path})

    # Apply rotation (90 degrees clockwise)
    rows, cols, _ = image.shape
    rotation_matrix = cv2.getRotationMatrix2D((cols/2, rows/2), 90, 1)
    rotated = cv2.warpAffine(image, rotation_matrix, (cols, rows))
    rotated_path = os.path.join(target_folder, f"{index}_rotated.png")
    cv2.imwrite(rotated_path, rotated)
    augmented_image_info.append({'mag': row['mag'], 'path': row['path'], 'filename': row['filename'],
                                 'class': row['class'], 'slide_id': row['slide_id'], 'tumor_type': row['tumor_type'],
                                 'augmented_path': rotated_path})

# Convert the list of dictionaries to a DataFrame
augmented_df_2 = pd.DataFrame(augmented_image_info)

# Save the DataFrame to a CSV file
augmented_df_2.to_csv('/content/drive/MyDrive/Breast Cancer Project/Mag200/df_mag200_augmented.csv', index=False)

In [32]:
import pandas as pd

# Create a DataFrame
df = augmented_df_2

# Create two separate dataframes with the required columns
df1 = df[['mag', 'path', 'filename', 'class', 'slide_id', 'tumor_type']]
df2 = df[['mag', 'augmented_path', 'filename', 'class', 'slide_id', 'tumor_type']]

# Rename the columns for the second dataframe
df2.columns = ['mag', 'path', 'filename', 'class', 'slide_id', 'tumor_type']

# Concatenate the two dataframes
result_df_2 = pd.concat([df1, df2])

# Write the result to a new CSV file
result_df_2

# Drop duplicates based on specific columns
df_deduplicated_2 = result_df_2.drop_duplicates(subset=['path'])

# Display the deduplicated DataFrame
df_deduplicated_2

Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-2980-400-014.png,malignant,2980,DC
3,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_LC-14-15570-400-015.png,malignant,15570,LC
6,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_F-14-21998EF-400-019.png,benign,21998EF,F
9,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_TA-14-3411F-400-008.png,benign,3411F,TA
12,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_DC-14-2523-400-012.png,malignant,2523,DC
...,...,...,...,...,...,...
2764,400,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_M_DC-14-16336-400-008.png,malignant,16336,DC
2765,400,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_M_DC-14-16336-400-008.png,malignant,16336,DC
2766,400,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_M_DC-14-4372-400-018.png,malignant,4372,DC
2767,400,/content/drive/MyDrive/Breast Cancer Project/A...,SOB_M_DC-14-4372-400-018.png,malignant,4372,DC


In [33]:
# Display the deduplicated DataFrame
df_deduplicated_2.to_csv('/content/drive/MyDrive/Breast Cancer Project/Mag200/df_mag200_augmented_final.csv', index=False)