In [2]:
import os
import cv2
import numpy as np

def pad_images(input_folder, output_folder, target_size=(600, 600)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        
        if img is None:
            print(f"Skipping {filename}, unable to read file.")
            continue
        
        # Check if image is larger than target size
        h, w = img.shape[:2]
        if h > target_size[1] or w > target_size[0]:
            scale = min(target_size[1] / h, target_size[0] / w)
            new_w, new_h = int(w * scale), int(h * scale)
            img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
            h, w = new_h, new_w  # Update dimensions

        # Calculate padding
        top = (target_size[1] - h) // 2
        bottom = target_size[1] - h - top
        left = (target_size[0] - w) // 2
        right = target_size[0] - w - left

        # Check if image has an alpha channel
        if img.shape[-1] == 4:  # PNG with transparency
            padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[255, 255, 255, 0])
        else:
            padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[255, 255, 255])

        # Save the padded image
        output_path = os.path.join(output_folder, filename)
        cv2.imwrite(output_path, padded_img)
        print(f"Processed {filename} -> {output_path}")

# Example usage
input_folder = "D:/rximage/image/images/split"
output_folder = "D:/rximage/image/images/split_padded"
pad_images(input_folder, output_folder)


Processed 00002-3228-30_RXNAVIMAGE10_391E1C80_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3228-30_RXNAVIMAGE10_391E1C80_bottom.jpg
Processed 00002-3228-30_RXNAVIMAGE10_391E1C80_top.jpg -> D:/rximage/image/images/split_padded\00002-3228-30_RXNAVIMAGE10_391E1C80_top.jpg
Processed 00002-3229-30_RXNAVIMAGE10_3E1E1F50_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3229-30_RXNAVIMAGE10_3E1E1F50_bottom.jpg
Processed 00002-3229-30_RXNAVIMAGE10_3E1E1F50_top.jpg -> D:/rximage/image/images/split_padded\00002-3229-30_RXNAVIMAGE10_3E1E1F50_top.jpg
Processed 00002-3235-60_RXNAVIMAGE10_1B158D9C_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3235-60_RXNAVIMAGE10_1B158D9C_bottom.jpg
Processed 00002-3235-60_RXNAVIMAGE10_1B158D9C_top.jpg -> D:/rximage/image/images/split_padded\00002-3235-60_RXNAVIMAGE10_1B158D9C_top.jpg
Processed 00002-3238-30_RXNAVIMAGE10_361E1B30_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3238-30_RXNAVIMAGE10_361E1B30_bottom.jpg
Processed 

In [None]:
import os
import cv2
import numpy as np

def pad_images(input_folder, output_folder, target_size=(600, 600)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        
        if img is None:
            print(f"Skipping {filename}, unable to read file.")
            continue
        
        # Check if image is larger than target size
        h, w = img.shape[:2]
        if h > target_size[1] or w > target_size[0]:
            scale = min(target_size[1] / h, target_size[0] / w)
            new_w, new_h = int(w * scale), int(h * scale)
            img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
            h, w = new_h, new_w  # Update dimensions

        # Calculate padding
        top = (target_size[1] - h) // 2
        bottom = target_size[1] - h - top
        left = (target_size[0] - w) // 2
        right = target_size[0] - w - left

        # Check if image has an alpha channel
        if img.shape[-1] == 4:  # PNG with transparency
            padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[255, 255, 255, 0])
        else:
            padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[255, 255, 255])

        # Save the padded image
        output_path = os.path.join(output_folder, filename)
        cv2.imwrite(output_path, padded_img)
        print(f"Processed {filename} -> {output_path}")

# Example usage
input_folder = "D:/rximage/image/images/split"
output_folder = "D:/rximage/image/images/split_padded"
pad_images(input_folder, output_folder)


Processed 00002-3228-30_RXNAVIMAGE10_391E1C80_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3228-30_RXNAVIMAGE10_391E1C80_bottom.jpg
Processed 00002-3228-30_RXNAVIMAGE10_391E1C80_top.jpg -> D:/rximage/image/images/split_padded\00002-3228-30_RXNAVIMAGE10_391E1C80_top.jpg
Processed 00002-3229-30_RXNAVIMAGE10_3E1E1F50_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3229-30_RXNAVIMAGE10_3E1E1F50_bottom.jpg
Processed 00002-3229-30_RXNAVIMAGE10_3E1E1F50_top.jpg -> D:/rximage/image/images/split_padded\00002-3229-30_RXNAVIMAGE10_3E1E1F50_top.jpg
Processed 00002-3235-60_RXNAVIMAGE10_1B158D9C_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3235-60_RXNAVIMAGE10_1B158D9C_bottom.jpg
Processed 00002-3235-60_RXNAVIMAGE10_1B158D9C_top.jpg -> D:/rximage/image/images/split_padded\00002-3235-60_RXNAVIMAGE10_1B158D9C_top.jpg
Processed 00002-3238-30_RXNAVIMAGE10_361E1B30_bottom.jpg -> D:/rximage/image/images/split_padded\00002-3238-30_RXNAVIMAGE10_361E1B30_bottom.jpg
Processed 

In [None]:
import os
import cv2
import numpy as np
import random

def rotate_images(input_folder, output_folder, angle_range=(-90, 90), num_copies=30):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        
        if img is None:
            print(f"Skipping {filename}, unable to read file.")
            continue
        
        name, ext = os.path.splitext(filename)
        
        for i in range(1, num_copies + 1):
            h, w = img.shape[:2]
            angle = random.uniform(angle_range[0], angle_range[1])  # Random rotation
            center = (w // 2, h // 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated_img = cv2.warpAffine(img, rotation_matrix, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=[255, 255, 255])
            
            output_filename = f"{name}_{i}{ext}"
            output_path = os.path.join(output_folder, output_filename)
            cv2.imwrite(output_path, rotated_img)
            print(f"Processed {output_filename} -> {output_path}")

input_folder = "D:/rximage/image/images/split_padded"
output_folder = "D:/rximage/image/images/split_padded_rotated"
rotate_images(input_folder, output_folder)

In [1]:
import pandas as pd

df = pd.read_csv("rximage_dataV3.csv")

In [2]:
df.columns

Index(['ndc11', 'rxcui', 'name', 'labeler', 'acqDate', 'attribution',
       'ingredientsAvailable', 'rxnavImageObjectId', 'rxnavImageFileName',
       'rxnavImageSize', 'nlmImageObjectId', 'nlmImageFileName',
       'nlmImageSize', 'shape', 'size', 'color', 'imprint', 'imprintType',
       'symbol', 'score', 'active_ingredients', 'inactive_ingredients',
       'base_filename', 'new_filename', 'split_type', 'split_side'],
      dtype='object')

In [3]:
df.shape[0]

8784

In [6]:
df_pre = df[['ndc11', 'rxcui','name', 'shape', 'size', 'color', 'imprint', 'imprintType','symbol', 'score', 'base_filename', 'new_filename', 'split_type', 'split_side']] 

In [7]:
df_pre.columns

Index(['ndc11', 'rxcui', 'name', 'shape', 'size', 'color', 'imprint',
       'imprintType', 'symbol', 'score', 'base_filename', 'new_filename',
       'split_type', 'split_side'],
      dtype='object')

In [None]:
import pandas as pd
import os

# Load the existing dataset
df = pd.read_csv("rximage_dataV3.csv")

# List to store new rows
augmented_data = []

for _, row in df.iterrows():
    base_filename = str(row["new_filename"])  # Ensure it's a string
    name, ext = os.path.splitext(base_filename)
    name = name.strip()  # Remove any leading or trailing spaces
    
    for i in range(1, 31):  # Generate 30 copies
        new_filename = f"{name}_{i}{ext}"  # Append number before extension

        # Create a new row by copying all existing column values
        new_row = row.copy()
        new_row["new_filename"] = new_filename  # Update with the new filename

        augmented_data.append(new_row)

# Convert to DataFrame
augmented_df = pd.DataFrame(augmented_data)

# Save only the augmented dataset (excluding original rows)
augmented_df.to_csv("augmented_dataset.csv", index=False)

print("Updated dataset saved with only 30 copies per original row (original rows removed).")


Updated dataset saved with only 30 copies per original row (original rows removed).


In [15]:
augmented_df.shape[0]

263520

In [17]:
augmented_df.head(50)

Unnamed: 0,ndc11,rxcui,name,labeler,acqDate,attribution,ingredientsAvailable,rxnavImageObjectId,rxnavImageFileName,rxnavImageSize,...,imprint,imprintType,symbol,score,active_ingredients,inactive_ingredients,base_filename,new_filename,split_type,split_side
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_1.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_2.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_3.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_4.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_5.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_6.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_7.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_8.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_9.jpg,vertical,bottom
0,63459-0502-30,261106,Fentanyl 0.2 MG Oral Lozenge [Actiq],"Cephalon, Inc.",09-26-2013,National Library of Medicine | Lister Hill Nat...,False,185688930,63459-0502-30_RXNAVIMAGE10_8641C37E.jpg,405034,...,ACTIQ;200,PRINTED,False,1,[],[],63459-0502-30_RXNAVIMAGE10_8641C37E,63459-0502-30_RXNAVIMAGE10_8641C37E_bottom_10.jpg,vertical,bottom
