In [1]:
import os
import random
import shutil
# from future.utils import iteritems


In [None]:

# Define paths to your dataset directory
dataset_dir = './shapes_dataset_HR'
train_dir = 'train'
test_dir = 'test'

# Define the ratio of data to be used for training (0.8 = 80% training, 20% testing)
train_ratio = 0.8

# Create train and test directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Loop through each class folder in the dataset directory
for class_folder in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_folder)
    
    # Check if it's a directory
    if os.path.isdir(class_path):
        # List all images in the class folder
        images = [f for f in os.listdir(class_path) if f.endswith('.jpg') or f.endswith('.png')]
        
        # Shuffle the images randomly
        random.shuffle(images)
        
        # Calculate the number of images for training
        num_train_images = int(len(images) * train_ratio)
        
        # Split the images into train and test sets
        train_images = images[:num_train_images]
        test_images = images[num_train_images:]
        
        # Move train images to the train directory
        for image in train_images:
            src = os.path.join(class_path, image)
            dest = os.path.join(train_dir, class_folder, image)
            os.makedirs(os.path.dirname(dest), exist_ok=True)
            shutil.copy(src, dest)
        
        # Move test images to the test directory
        for image in test_images:
            src = os.path.join(class_path, image)
            dest = os.path.join(test_dir, class_folder, image)
            os.makedirs(os.path.dirname(dest), exist_ok=True)
            shutil.copy(src, dest)

print("Dataset successfully split into train and test sets.")


In [2]:
import os
import random
import shutil
import cv2
import numpy as np


# Define paths to your dataset directory
dataset_dir = './shapes_dataset_HR'
train_dir = 'train'
test_dir = 'test'


# Define the ratio of data to be used for training (0.8 = 80% training, 20% testing)
train_ratio = 0.8

# Create train and test directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
    return images

train_images = {}
test_images = {}

# Loop through each class folder in the dataset directory
def load_images_and_labels(folder):
    images = []
    labels = []
    for class_folder in os.listdir(folder):
        class_path = os.path.join(folder, class_folder)
        if os.path.isdir(class_path):
            for filename in os.listdir(class_path):
                img = cv2.imread(os.path.join(class_path, filename), cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (64,64))
                if img is not None:
                    images.append(img)
                    labels.append(int(class_folder))  # Assuming class folder names are the labels
    return np.array(images), np.array(labels)

# Load train images and labels into memory
train_images, train_labels = load_images_and_labels(train_dir)

# Load test images and labels into memory
test_images, test_labels = load_images_and_labels(test_dir)

print("Train images shape:", train_images.shape)
print("Train labels shape:", train_labels.shape)
print("Test images shape:", test_images.shape)
print("Test labels shape:", test_labels.shape)

Train images shape: (9625, 64, 64)
Train labels shape: (9625,)
Test images shape: (3627, 64, 64)
Test labels shape: (3627,)


In [3]:
(Xtrain , Ytrain) , (Xtest , Ytest) = (train_images,  train_labels) , (test_images,test_labels)
Xtrain = Xtrain.reshape(Xtrain.shape[0], -1)
Xtest = Xtest.reshape(Xtest.shape[0], -1)
Xtrain, Xtest = Xtrain/255.0 , Xtest/255.0

In [6]:
import pickle

class Bayes(object):
    def fit(self, x, y, smoothing = 1e-2):
        n, d = x.shape     # n = number of samples, d = number of features
        self.gaussians = dict()
        self.priors = dict()
        labels = set(y)     # Unique number of labels 
        for c in labels:
            current_x = x[y == c]
            self.gaussians[c] = {
                'mean': current_x.mean(axis = 0),
                'cov': np.cov(current_x.T) + np.eye(d) * smoothing     # Covariance matrix np.eye is the identity matrix # Smoothing so that if any singular matrix is there then it will not be singular
            }
            self.priors[c] = float(len(y[y == c])) / len(y)

    def score(self, x, y):
        p = self.predict(x)
        return np.mean(p == y)
    
    def save_model(self, filename):
        with open(filename, 'wb') as file:
            pickle.dump({'gaussians': self.gaussians, 'priors': self.priors}, file)

    def load_model(self, filename):
        with open(filename, 'rb') as file:
            model = pickle.load(file)
            self.gaussians = model['gaussians']
            self.priors = model['priors']
            
            
    def predict(self, x):
        n, d = x.shape
        k = len(self.gaussians)     # Number of classes
        log_p = np.zeros((n, k))
        # for c, g in iteritems(self.gaussians):
        for c, g in self.gaussians.items():
            mean, cov = g['mean'], g['cov']
            cov += np.eye(d) * 1e-6
            cov_inv = np.linalg.inv(cov)
            cov_det = np.linalg.det(cov)
            log_prior =  np.log(self.priors[c])
            for i in range(n):
                t = x[i]
                diff = t - mean
                log_exponent = -0.5 * diff.dot(cov_inv).dot(diff)
                log_likelihood = log_exponent  - 0.5 *d * np.log(2*np.pi) - 0.5 * np.log(np.abs(cov_det) + 1e-6) 
                log_p[i,c] = log_likelihood + log_prior
        return np.argmax(log_p, axis = 1)
        #     p[:, c] = mvn.logpdf(x, mean = mean, cov = cov) + np.log(self.priors[c])     # logpdf is the log of the probability density function
        # return np.argmax(p, axis = 1)

In [7]:
model = Bayes()
model.fit(Xtrain, Ytrain)

print("Training Accuracy: ", model.score(Xtrain, Ytrain))
print("Test Accuracy: ", model.score(Xtest, Ytest))

Training Accuracy:  0.9727792207792207
Test Accuracy:  0.8957816377171216


In [9]:
# model.save_model('bayes_model_HR.pkl')