### I noticed a lot of the images in the dataset have very poor contrast which probably impeeds getting high accuracy in the model. One way to improve the contrast is to use histogram equalization on the images. The code below shows a few examples of this. The left side in the image is the original image and the right image is the histogram equalized image. Note the improvement in contrast

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import shutil
from tqdm import tqdm
def histo(img):
    pixmax=np.max(img)
    pixmin=np.min(img)    
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)    
    cli=clahe.apply(gray)
    rgb=cv2.cvtColor(cli, cv2.COLOR_GRAY2RGB)
    return rgb
trainpath=r'/kaggle/input/brain-tumor-mri-images-huge/brain tumor MRI images/train'
classes=sorted(os.listdir(trainpath))
img_samples=[]
for klass in classes:
    classpath=os.path.join(trainpath, klass)
    flist=sorted(os.listdir(classpath))
    class_samples=np.random.choice(flist, 5, replace=False) 
    for f in class_samples:
        fpath=os.path.join(classpath,f)
        img_samples.append(fpath)
plt.figure(figsize=(15, 15))
for i, fpath in enumerate(img_samples):
    img=cv2.imread(fpath)
    himg=histo(img)
    res = np.hstack((img,himg)) #stacking images side-by-side
    res= cv2.cvtColor(res, cv2.COLOR_BGR2RGB)
    plt.title('   original               equalized')
    plt.subplot(5,3, i+1)
    plt.imshow(res)
plt.show()

## If you want to try a model with histogram equalized images the code below will take the original train and validation sets, perform the historgram equalization and create a new dataset of histogram equalized images  within the kaggle working directory. Since a lot of files are being processed this will take a considerable amount of time to complete

In [None]:
trainpath=r'/kaggle/input/brain-tumor-mri-images-huge/brain tumor MRI images/train'
validpath=r'/kaggle/input/brain-tumor-mri-images-huge/brain tumor MRI images/val'
working_dir=r'/kaggle/working/'
destpath=os.path.join(working_dir, 'Histogram Equalized Images')
if os.path.isdir(destpath): # if the directoryexists delete it
    shutil.rmtree(destpath)
os.mkdir(destpath) # make an empty directory
datasets=[trainpath, validpath]
names=['train','val']
ziplist=zip(names, datasets)
for name,d in ziplist:
    dataset_destpath=os.path.join(destpath, name) 
    os.mkdir(dataset_destpath) # make the train or val directory
    classes=sorted(os.listdir(d))
    for klass in classes:
        classpath=os.path.join(d, klass)        
        dest_classpath=os.path.join(dataset_destpath, klass)
        os.mkdir(dest_classpath) # make directories for the classes
        desc=f'{name} - {klass}'
        flist=sorted(os.listdir(classpath))        
        for f in tqdm(flist,ncols=130, desc=desc, colour='blue', unit='files'):
            fpath=os.path.join(classpath,f)
            dest_fpath=os.path.join(dest_classpath,f)
            img=cv2.imread(fpath)
            himg=histo(img)
            cv2.imwrite(dest_fpath, himg)
print ('histogram equalized images have been created and stored')

In [None]:
# lets make sure the files were properly storedd
datasets=sorted(os.listdir(destpath))
msg='{0:^12s}{1:^20s}{2:^10s}'.format('DATA SET', 'CLASS', 'FILES')
print (msg)
for d in datasets:
    dpath=os.path.join(destpath,d)
    classes=sorted(os.listdir(dpath))
    for klass in classes:
        classpath=os.path.join(dpath,klass)
        flist=sorted(os.listdir(classpath))
        L=str(len(flist))
        msg='{0:^12s}{1:^20s}{2:^10s}'.format(d, klass, L)
        print (msg)
