In [1]:
import numpy as np

# Convolution function
def convolve2d(image, kernel, stride, padding):
    # Add zero padding to the input image
    image_padded = np.pad(image, [(padding, padding), (padding, padding), (0, 0)], mode='constant', constant_values=0)
    
    kernel_height, kernel_width = kernel.shape[:2]
    padded_height, padded_width = image_padded.shape[:2]

    # Determine the output dimensions
    output_height = (padded_height - kernel_height) // stride + 1
    output_width = (padded_width - kernel_width) // stride + 1

    # Create an empty image to store the output
    new_image = np.zeros((output_height, output_width, image.shape[-1]))
    
    # Perform the convolution
    for x in range(0, padded_height - kernel_height + 1, stride):
        for y in range(0, padded_width - kernel_width + 1, stride):
            new_image[x // stride, y // stride] = np.sum(
                image_padded[x:x + kernel_height, y:y + kernel_width] * kernel, axis=(0, 1)
            )
    return new_image

# Pooling function
def pool2d(image, pool_size, stride, pooling_type='max'):
    # Determine the output dimensions
    output_height = (image.shape[0] - pool_size) // stride + 1
    output_width = (image.shape[1] - pool_size) // stride + 1

    # Create an empty image to store the output
    new_image = np.zeros((output_height, output_width, image.shape[-1]))

    # Perform the pooling
    for x in range(0, image.shape[0] - pool_size + 1, stride):
        for y in range(0, image.shape[1] - pool_size + 1, stride):
            if pooling_type == 'max':
                new_image[x // stride, y // stride] = np.max(
                    image[x:x + pool_size, y:y + pool_size], axis=(0, 1)
                )
            elif pooling_type == 'average':
                new_image[x // stride, y // stride] = np.mean(
                    image[x:x + pool_size, y:y + pool_size], axis=(0, 1)
                )
    return new_image

# ReLU Activation function
def relu(x):
    return np.maximum(0, x)

# Softmax function
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# MLP implementation
def mlp(input_layer, weights, biases):
    return softmax(np.dot(weights, input_layer) + biases)

# Example input (random image data and random filter for convolution)
input_image = np.random.rand(32, 32, 3)
kernel = np.random.rand(3, 3, 3)
stride = 1
padding = 0

# Convolution 3x3
convoluted_image = convolve2d(input_image, kernel, stride, padding)

# Pooling
pooled_image = pool2d(convoluted_image, 2, 2, 'max')

# Flatten the image
flattened = pooled_image.flatten()

# MLP (assuming some random weights and biases)
weights = np.random.rand(10, flattened.size)  # 10 classes for example
biases = np.random.rand(10)

# Output from MLP
output = mlp(flattened, weights, biases)

output


array([5.50590447e-24, 3.44861937e-27, 4.65997539e-24, 8.75116818e-26,
       2.54694442e-35, 1.07614287e-37, 1.76913590e-15, 6.62121363e-20,
       2.57847755e-10, 1.00000000e+00])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve2d
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Fetching the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
# Normalizing the data
scaler = MinMaxScaler()
X = scaler.fit_transform(mnist.data)
y = mnist.target.astype(np.uint8)

# Reshaping the data to have a channel dimension, here 1 channel because MNIST is grayscale
X_reshaped = X.reshape((-1, 28, 28, 1))

# Defining the convolution operation
def convolve2d_manual(input_data, kernel, stride=1, padding=0):
    """
    Perform a 2D convolution operation manually without using built-in convolve functions.
    
    Parameters:
    - input_data: Input data or image (2D array)
    - kernel: Convolution kernel (2D array)
    - stride: Stride of the convolution operation
    - padding: Zero-padding added to the input
    
    Returns:
    - output: The result of the convolution operation
    """
    # Adding zero padding to the input data
    if padding > 0:
        input_data = np.pad(input_data, [(padding, padding), (padding, padding)], mode='constant', constant_values=0)
    
    # Calculating the dimensions of the output
    output_height = ((input_data.shape[0] - kernel.shape[0]) // stride) + 1
    output_width = ((input_data.shape[1] - kernel.shape[1]) // stride) + 1
    output = np.zeros((output_height, output_width))
    
    # Performing the convolution operation
    for y in range(0, output_height):
        for x in range(0, output_width):
            output[y, x] = np.sum(input_data[y*stride:y*stride+kernel.shape[0], x*stride:x*stride+kernel.shape[1]] * kernel)
    return output

# Defining pooling operation
def pooling_manual(input_data, size=2, stride=2, mode='max'):
    """
    Perform a pooling operation manually without using built-in pool functions.
    
    Parameters:
    - input_data: Input data or feature map (2D array)
    - size: The size of the window to take a max or average over
    - stride: The stride of the pooling operation
    - mode: The pooling mode - 'max' for max pooling or 'avg' for average pooling
    
    Returns:
    - output: The result of the pooling operation
    """
    # Calculating the dimensions of the output
    output_height = ((input_data.shape[0] - size) // stride) + 1
    output_width = ((input_data.shape[1] - size) // stride) + 1
    output = np.zeros((output_height, output_width))
    
    # Performing the pooling operation
    for y in range(0, output_height):
        for x in range(0, output_width):
            window = input_data[y*stride:y*stride+size, x*stride:x*stride+size]
            if mode == 'max':
                output[y, x] = np.max(window)
            elif mode == 'avg':
                output[y, x] = np.mean(window)
    return output

# Creating a simple 3x3 convolution kernel for demonstration
kernel = np.array([[1, 0, -1],
                   [1, 0, -1],
                   [1, 0, -1]])

# Convolving the first image in the dataset
convolved_image = convolve2d_manual(X_reshaped[0, :, :, 0], kernel)

# Pooling the convolved image
pooled_image = pooling_manual(convolved_image, mode='max')

# Displaying the original, convolved, and pooled images
fig, ax = plt.subplots(1, 3, figsize=(12, 5))
ax[0].imshow(X_reshaped[0, :, :, 0], cmap='gray')
ax[0].set_title('Original Image')
ax[0].axis('off')

ax[1].imshow(convolved_image, cmap='gray')
ax[1].set_title('Convolved Image')
ax[1].axis('off')

ax[2].imshow(pooled_image, cmap='gray')
ax[2].set_title('Pooled Image')
ax[2].axis('off')

plt.show()

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a Multi
# Fetching the MNIST dataset again
mnist = fetch_openml('mnist_784', version=1)

# Normalizing the data again
scaler = MinMaxScaler()
X = scaler.fit_transform(mnist.data)
y = mnist.target.astype(np.uint8)

# Splitting the dataset into training and test sets again
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Defining the MLP classifier again
mlp = MLPClassifier(hidden_layer_sizes=(64,), max_iter=10, alpha=1e-4,
                    solver='sgd', verbose=10, random_state=1,
                    learning_rate_init=.1)

# Training the MLP classifier on the training set
mlp.fit(X_train, y_train)

# Evaluating the classifier on the test set
test_score = mlp.score(X_test, y_test)

# Print the test score
print(f"The test score of the MLP classifier is: {test_score:.4f}")

  warn(


In [1]:
import numpy as np

# Assuming the image preprocessing involves normalization and maybe some distortion, 
# we will just simulate this with a function that 'processes' images by a simple normalization for now.
def preprocess_images(images):
    # Normalize image data to 0-1
    images_normalized = images / 255.0
    return images_normalized

# A simple DNN structure for simulation purposes
class SimpleDNN:
    def __init__(self, input_shape, num_classes):
        # Simulate a simple neural network with random weights
        self.weights = np.random.rand(input_shape, num_classes)
    
    def predict(self, image):
        # Perform a simple matrix multiplication and softmax to simulate prediction
        logits = np.dot(image, self.weights)
        return np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

# Function to create and train a Multi-Column DNN
def train_mcdnn(num_columns, input_shape, num_classes, images, labels):
    # Create a list of DNNs
    dnns = [SimpleDNN(input_shape, num_classes) for _ in range(num_columns)]
    
    # Simulate training and averaging predictions
    predictions = np.zeros((len(images), num_classes))
    for dnn in dnns:
        # There is no actual training going on; we simulate this by just predicting with untrained networks
        preds = dnn.predict(images)
        predictions += preds
        
    # Average the predictions from each column
    predictions /= num_columns
    return predictions

# Example usage
num_classes = 10  # Just as an example, say we have 10 classes
num_columns = 3   # Number of columns in the MCDNN
input_shape = 784 # Example input shape (28x28 images flattened)

# Simulate some data (e.g., MNIST)
np.random.seed(0) # For reproducibility
images = np.random.rand(100, input_shape) # 100 random images
labels = np.random.randint(0, num_classes, 100) # 100 random labels

# Preprocess the images
processed_images = preprocess_images(images)

# Train the MCDNN
predictions = train_mcdnn(num_columns, input_shape, num_classes, processed_images, labels)

# The predictions are now in `predictions` and we would typically calculate accuracy or other metrics here
# However, since we have simulated data and a non-trained model, the accuracy would not be meaningful
predictions[:5]  # Show the first 5 predictions for brevity


array([[0.09771437, 0.09976008, 0.09895422, 0.10102684, 0.10103402,
        0.10196753, 0.09813206, 0.1000662 , 0.10096947, 0.10037521],
       [0.09915239, 0.10057207, 0.10038062, 0.10033083, 0.1011093 ,
        0.10141746, 0.09710186, 0.0987585 , 0.10122078, 0.0999562 ],
       [0.09928585, 0.09839743, 0.10086476, 0.1014137 , 0.1012655 ,
        0.10100233, 0.09746659, 0.09891252, 0.10050226, 0.10088906],
       [0.09964296, 0.09998277, 0.09989744, 0.10047433, 0.10058792,
        0.10179031, 0.09823333, 0.09869494, 0.10066029, 0.10003572],
       [0.0986354 , 0.09988082, 0.10045291, 0.10099037, 0.10155249,
        0.10093839, 0.09781693, 0.09908382, 0.1006316 , 0.10001726]])

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Load and preprocess MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

class CNNColumn(nn.Module):
    def __init__(self):
        super(CNNColumn, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)  # Adding padding to maintain the size
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  # Define conv2 with appropriate in/out channels
        # Adjust the linear layer to match the output of the conv/pool layers
        self.fc = nn.Linear(7 * 7 * 64, 10)  # Assuming the output is 7x7 after pooling

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Adjust the view to match the fc layer input
        x = x.view(-1, 7 * 7 * 64)  # Match the dimension with the fc layer
        x = self.fc(x)
        return x

class MCDNN(nn.Module):
    def __init__(self, num_columns):
        super(MCDNN, self).__init__()
        self.columns = nn.ModuleList([CNNColumn() for _ in range(num_columns)])

    def forward(self, x):
        column_outputs = [column(x) for column in self.columns]
        averaged_output = torch.mean(torch.stack(column_outputs), dim=0)
        return averaged_output

# Instantiation, Training, Evaluation (Example Structure)
num_columns = 5
model = MCDNN(num_columns)
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# Training and Evaluation
num_epochs = 10  # Adjust as needed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if possible

model.to(device)  # Move the model to the device

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()  # Clear gradients
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

    # Evaluation
    model.eval()  # Set model to evaluation mode
    test_loss, test_acc = 0, 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            test_loss += loss_fn(outputs, labels).item()
            test_acc += (outputs.argmax(dim=1) == labels).float().sum().item()

    test_loss /= len(testloader)
    test_acc /= len(testloader.dataset)
    print(f"Epoch {epoch+1}, Test Loss: {test_loss:.3f}, Test Accuracy: {test_acc:.3f}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:37<00:00, 267686.10it/s] 


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 42758804.74it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:13<00:00, 124436.50it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1217208.41it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



RuntimeError: shape '[-1, 4608]' is invalid for input of size 346112

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define the CNN architecture (similar to AlexNet)
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):  # CIFAR-10 has 10 classes
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# CIFAR-10 loading and normalization
transform = transforms.Compose([
    transforms.Resize(224),  # Resize images to size used by AlexNet
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Initialize the network
net = AlexNet(num_classes=10)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)

# Training loop
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# Save the trained model
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define a simple neural network with dropout
class SimpleNetWithDropout(nn.Module):
    def __init__(self):
        super(SimpleNetWithDropout, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(32 * 32 * 3, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5)  # 50% probability of dropping out each neuron
        )
        self.layer2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p=0.5)  # 50% probability of dropping out each neuron
        )
        self.layer3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the image
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

# Transform and load the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Initialize the network
net = SimpleNetWithDropout()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)

# Training loop
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradient buffers

        outputs = net(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        running_loss += loss.item()
        if i % 200 == 199:  # Print every 200 mini-batches
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 200}')
            running_loss = 0.0

print('Finished Training')

# Save the trained model
PATH = './cifar_net_with_dropout.pth'
torch.save(net.state_dict(), PATH)
