In [3]:
import os, cv2
import numpy as np
from PIL import Image, ImageStat, ImageOps

In [4]:
def resize ( data_dir, dest_dir ) :
    num = 1
    for file in os.listdir( data_dir ):
        if( file.endswith(('.jpg', 'jpeg', 'png')) ):

            # print( file )
            img = Image.open( os.path.join(data_dir, file) )

            if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
                # Create a new background img with the specified fill color and size of the original img
                background = Image.new("RGB", img.size, (255,255,255))

                if img.mode == 'P':
                    alpha = img.convert("RGBA").split()[3]  # Extract alpha channel from RGBA conversion
                else:
                    alpha = img.split()[3] 
                
                background.paste(img, mask=alpha)  # 3 is the alpha channel
                img = background
            else:
                # If no alpha channel, just convert to RGB
                img = img.convert('RGB')
            
            width, height = img.size
            size = min (width, height)
            left = (width - size) / 2
            top = (height - size) / 2
            right = (width + size) / 2
            bottom = (height + size) / 2
            img = img.crop((left, top, right, bottom))
            
            img = img.resize( (224, 224) )
        
            img_array = np.array( img )
            norm_img = cv2.normalize(img_array, None, 0, 255, cv2.NORM_MINMAX)
            img = Image.fromarray( norm_img )
            img = img.convert("RGB")
        
            img.save( os.path.join(dest_dir, f"img_{num}.jpg" ), "JPEG")
                
            num += 1
    
    # stat = ImageStat.Stat(img)
    # img = ( img - stat.mean ) / stat.stddev

In [5]:
# Data Augmentation
def augment( path ) :
    hflipn = len( os.listdir( path ) ) + 1
    vflipn = 2 * ( hflipn - 1 ) + 1
    rotn = 3 * ( hflipn - 1 ) + 1
    
    for file in os.listdir(path):
        if( file.endswith( ('.jpg', '.jpeg', 'png') ) ):
            image = Image.open( os.path.join( path, file ) )
            image = image.convert( "RGB" )
            # image = ImageOps.grayscale( image )                    GrayScale Conversion
            hflip = image.transpose( Image.FLIP_LEFT_RIGHT )
            vflip = image.transpose( Image.FLIP_TOP_BOTTOM )
            rot = image.rotate( np.random.choice( [90, 180, 270] ) )

            hflip.save( os.path.join( path, f"img_{hflipn}.jpg" ), "JPEG")
            vflip.save( os.path.join( path, f"img_{vflipn}.jpg" ), "JPEG")
            rot.save( os.path.join( path, f"img_{rotn}.jpg" ), "JPEG")
            hflipn += 1
            vflipn += 1
            rotn += 1
        

In [6]:
def prepare( data, dest ):
    print( 'Classes in Dataset for Preprocessing :')
    for dir in os.listdir( data ):
        source = os.path.join( data, dir)
        result = os.path.join( dest, dir)
        
        if( os.path.isdir( source ) ):
            os.makedirs( result )
            resize( source, result )

            augment( result )
            print( f'{dir} : Done ✅' )

In [8]:
data_dir = r'datasets\unprocessed'
dest_dir = r'datasets\IPL_DS'
        
prepare( data_dir, dest_dir)

Classes in Dataset for Preprocessing :
CSK : Done ✅
DC : Done ✅
GT : Done ✅
KKR : Done ✅
LSG : Done ✅
MI : Done ✅
PK : Done ✅
RCB : Done ✅
RR : Done ✅
SRH : Done ✅


In [9]:
import os
import shutil
from sklearn.model_selection import train_test_split

dataset_dir = r'datasets\IPL_DS'
train_dir = r'datasets\IPL_train'
test_dir = r'datasets\IPL_test'
test_size = 0.15  # 20% test size

# Create train and test directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Split and copy images
for class_name in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_path):
        os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
        
        images = os.listdir(class_path)
        train_images, test_images = train_test_split(images, test_size=test_size, random_state=42)
        
        for image in train_images:
            src = os.path.join(class_path, image)
            dst = os.path.join(train_dir, class_name, image)
            shutil.copyfile(src, dst)
        
        for image in test_images:
            src = os.path.join(class_path, image)
            dst = os.path.join(test_dir, class_name, image)
            shutil.copyfile(src, dst)

def count_images(directory):
    count = 0
    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):
            count += len(os.listdir(class_path))
    return count

print(f'Total images in train set: {count_images(train_dir)}')
print(f'Total images in test set: {count_images(test_dir)}')


Total images in train set: 4892
Total images in test set: 868
