# __Implementing CNN (Convulusion Neural Network) from scratch__
### without machine learning library and with numpy and pandas only


##### imports

In [2]:
# import library 
import os 
import numpy as np 
import struct 
import gzip



##### actual code


In [16]:

def read_idx(file_name):
    with open(file_name, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        data = np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)
    return data

def load_emnist_byclass(path="EMNIST", dataset_type = "byclass"):
    # files are inside gzip.zip -> extract first
    X_train = read_idx(os.path.join(path, "emnist-byclass-train-images-idx3-ubyte"))
    y_train = read_idx(os.path.join(path, "emnist-byclass-train-labels-idx1-ubyte"))
    X_test  = read_idx(os.path.join(path, "emnist-byclass-test-images-idx3-ubyte"))
    y_test  = read_idx(os.path.join(path, "emnist-byclass-test-labels-idx1-ubyte"))

    # normalize to [0,1]
    X_train = X_train.astype(np.float32) / 255.0
    X_test  = X_test.astype(np.float32) / 255.0
    
    return X_train, y_train, X_test, y_test 

# X_train, y_train, X_test, y_test = load_emnist_byclass("gzip/")


# Define the directory where your files are located
def extract_gzip(input_dir = 'gzip/', data_set_type='byclass'):
    unpacked_dir = os.path.join(input_dir, f"unpacked_{data_set_type}")

    if os.path.exists(unpacked_dir):
        print("Checking:", os.path.abspath(unpacked_dir))
        print("directory already exist")
        return 

    # if directory does not exist 
    print("making directory")
    os.makedirs(unpacked_dir)    


        # Loop through all files in the specified directory
    # example relative path "gzip\emnist-digits-test-images-idx3-ubyte.gz"
    for filename in os.listdir(input_dir):
        # Check if the file has a '.gz' extension
        print(filename)
        print(type(filename))
        if filename.endswith('.gz'):
            # Construct the full path for the compressed file
            compressed_filepath = os.path.join(input_dir, filename)

            # Create the name for the new uncompressed file by removing the '.gz' extension
            uncompressed_filename = os.path.splitext(filename)[0]
            uncompressed_filepath = os.path.join(unpacked_dir, uncompressed_filename)

            # Open the compressed file and the new uncompressed file
            with gzip.open(compressed_filepath, 'rb') as f_in:
                with open(uncompressed_filepath, 'wb') as f_out:
                    # Read the compressed data and write it to the new file
                    f_out.write(f_in.read())

            print(f'\t- Extracted: {filename} -> {uncompressed_filename}')


extract_gzip()
p = "gzip/"   # replace this with the path you passed earlier
print("Checking:", os.path.abspath(p))

making directory
emnist-balanced-mapping.txt
<class 'str'>
emnist-balanced-test-images-idx3-ubyte.gz
<class 'str'>
	- Extracted: emnist-balanced-test-images-idx3-ubyte.gz -> emnist-balanced-test-images-idx3-ubyte
emnist-balanced-test-labels-idx1-ubyte.gz
<class 'str'>
	- Extracted: emnist-balanced-test-labels-idx1-ubyte.gz -> emnist-balanced-test-labels-idx1-ubyte
emnist-balanced-train-images-idx3-ubyte.gz
<class 'str'>
	- Extracted: emnist-balanced-train-images-idx3-ubyte.gz -> emnist-balanced-train-images-idx3-ubyte
emnist-balanced-train-labels-idx1-ubyte.gz
<class 'str'>
	- Extracted: emnist-balanced-train-labels-idx1-ubyte.gz -> emnist-balanced-train-labels-idx1-ubyte
emnist-byclass-mapping.txt
<class 'str'>
emnist-byclass-test-images-idx3-ubyte.gz
<class 'str'>
	- Extracted: emnist-byclass-test-images-idx3-ubyte.gz -> emnist-byclass-test-images-idx3-ubyte
emnist-byclass-test-labels-idx1-ubyte.gz
<class 'str'>
	- Extracted: emnist-byclass-test-labels-idx1-ubyte.gz -> emnist-byclass