<a href="https://colab.research.google.com/github/Scottymichaelmillerguy/Neural_Network_Architectures_for_Deep_Learning/blob/main/Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Networks

## From ML Algorithms to GenAI & LLMs

## Perceptron

Let's implement a simple Perceptron using Python

In [None]:
import numpy as np

class Perceptron:
  def __init__(self, input_dim):
    self.weights = np.random.rand(input_dim)
    self.bias = np.random.rand()

  def predict(self, inputs):
    weighted_sum = np.dot(inputs, self.weights) + self.bias
    return 1 if weighted_sum >= 0 else 0

# Example usage:
input_data = np.array([1, 0, 1])
perceptron = Perceptron(input_dim=len(input_data))
prediction = perceptron.predict(input_data)
print("Prediction:", prediction)

Prediction: 1


Multi-layer Perceptrons MLPs

Lets implement a simple MLP using Python and the popular Deep Learning library, Keras:

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

# Create a sample dataset
np.random.seed(42)
X = np.random.rand(100, 10) # 100 samples, 10 features
y = np.random.randint(2, size=100) # Binary labels (0 or 1)

# Create an MLP model
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=10)) # Hidden layer with 32 neurons and ReLU activation
model.add(Dense(1, activation='sigmoid')) # Output layer with 1 neuron and Sigmoid activation

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.5122 - loss: 0.7242
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.5507 - loss: 0.6907
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.5535 - loss: 0.6945
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.5273 - loss: 0.7145 
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5730 - loss: 0.6990 
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5573 - loss: 0.6929 
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5400 - loss: 0.6929
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.5294 - loss: 0.6923
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x7bb1f0e1a410>

## Convolutional Neural Networks CNN

Let's implement a simple CNN for image classification using Python and Keras

In [None]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Create a sample dataset
np.random.seed(42)

# 100 images of size 28x28 with 3 color channels
X = np.random.rand(100, 28, 28, 3)
y = np.random.randint(10, size=100) # Labels for 10 classes (0 to 9)

# Create a CNN model
model = Sequential()

# Convolutional layer with 32 filters and ReLU activation
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu',
                 input_shape=(28, 28, 3)))
model.add(MaxPooling2D(pool_size=(2, 2))) # Max pooling layer

model.add(Flatten()) # Flatten the output for fully connected layers

# Fully connected output layer with 10 neaurons for 10 classes and Softmax activation
model.add(Dense(10, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)

Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - accuracy: 0.1046 - loss: 2.4622
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.1598 - loss: 2.4159
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.1944 - loss: 2.2048
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.2644 - loss: 2.1420
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3160 - loss: 2.0608
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3766 - loss: 1.9785
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.2225 - loss: 1.8820
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.2782 - loss: 1.7805
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x7bb1f09a2dd0>

## Recurrent Neural Networks (RNN)

Let's implement a simple RNN using Python and Keras

In [None]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense

# Create a sample sequential data
np.random.seed(42)

# 100 sequences of length 10 with 1 feature
X = np.random.rand(100, 10, 1)
y = np.random.randint(2, size=100) # Binary labels (0 or 1)

# Create an RNN model
model = Sequential()

# RNN layer with 32 neurons
model.add(SimpleRNN(32, input_shape=(10, 1)))

# Output layer with 1 neuron for binary classification and Sigmoid activation
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)



Epoch 1/10


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5830 - loss: 0.6993
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5749 - loss: 0.6821
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5462 - loss: 0.6834
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5875 - loss: 0.6653
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5438 - loss: 0.6762
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5882 - loss: 0.6681
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5641 - loss: 0.6756 
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5652 - loss: 0.6670 
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

<keras.src.callbacks.history.History at 0x7bb1d83a1250>

## Long Short-Term Memory Networks (LSTM)

Let's implement a simple LSTM using PYthon and Keras

In [None]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Create a sample sequential data
np.random.seed(42)

# 100 sequences of length 10 with 1 feature
X = np.random.rand(100, 10, 1)
y = np.random.randint(2, size=100) # Binary labels (0 or 1)

# Create an LSTM model
model = Sequential()
model.add(LSTM(32, input_shape=(10, 1))) # LSTM layer with 32 neurons

# Output layer with 1 neuron for binary classificaiton and Sigmoid activation
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)

Epoch 1/10


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.4977 - loss: 0.6933
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5195 - loss: 0.6926 
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5070 - loss: 0.6927
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5382 - loss: 0.6904
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4820 - loss: 0.6994
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5518 - loss: 0.6873
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.4986 - loss: 0.6991 
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5247 - loss: 0.6922
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

<keras.src.callbacks.history.History at 0x7c2b965f57d0>

## Generative Adversial Networks (GANs)

Let's implement a simple Generative Adversarial Network for generating images using Python and Keras

In [None]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, LeakyReLU
from keras.optimizers import Adam

# Create a sample dataset of random noise
np.random.seed(42)
# 100 samples of random noise with 100 dimensions
X_noise = np.random.rand(100, 100)

# Create a generator model
generator = Sequential()
generator.add(Dense(256, input_dim=100))
generator.add(LeakyReLU(alpha=0.01))
generator.add(Dense(512))
generator.add(LeakyReLU(alpha=0.01))
# Output layer with 784 neurons for 28x28 images
generator.add(Dense(784, activation='tanh'))

# Create a discriminator model
discriminator = Sequential()
discriminator.add(Dense(512, input_dim=784))
discriminator.add(LeakyReLU(alpha=0.01))
discriminator.add(Dense(256))
discriminator.add(LeakyReLU(alpha=0.01))
# OUtput layer with 1 neuron for binary classification
discriminator.add(Dense(1, activation='sigmoid'))

# Compile the discriminator
discriminator.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5),
                      loss='binary_crossentropy')

# Create a GAN model by combining the generator and discriminator
discriminator.trainable = False # Freeze the discriminator during GAN training
gan = Sequential()
gan.add(generator)
gan.add(discriminator)

# Compile the GAN
gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5),
            loss='binary_crossentropy')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


here is how you can train the GAN model

In [4]:
# 1000 samples of real images with 784 dimension (28x28 images flattened)
X_real = np.random.rand(1000, 784)

# Fit the GAN model
epochs = 10000
batch_size = 64

for epoch in range(epochs):
  # Generate fake images using the generator
  noise = np.random.rand(batch_size, 100)
  fake_images = generator.predict(noise)

# Select a random batch of real images from the original dataset
  idx = np.random.randint(0, X_real.shape[0], batch_size)
  real_images = X_real[idx]
# Concatenate real and fake images to create a batch for training the discriminator
  X_combined = np.concatenate([real_images, fake_images])
# Labels for the discriminaotr: 1 for real images, 0 for fake images
  y_discriminator = np.zeros(2 * batch_size)
  y_discriminator[:batch_size] = 0.9 # One-sided label smoothing for stability
# Train the discriminator
  discriminator_loss = discriminator.train_on_batch(X_combined, y_discriminator)

# Generate new noise for the generator
  noise = np.random.rand(batch_size, 100)
# Lebls for the generator: 1 (real) because we want the discriminator to mistake fake images as real

  y_generator = np.ones(batch_size)
# Train the GAN (generator only)
  gan_loss = gan.train_on_batch(noise, y_generator)
# Print the progress
  if epoch % 100 == 0:
    print(f"Epoch {epoch} | Discriminator Loss: {discriminator_loss} | GAN Loss: {gan_loss}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step

KeyboardInterrupt: 

## Transformer Networks

Let's take a high-level look at how to use a Transformer model using Hugging Face's "transformers" library in Python

In [7]:

pip install transformers




In [9]:

import torch
from transformers import BertTokenizer, BertForMaskedLM

# Load pre-trained BERT moel and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')

# Input text for masked language modeling
text = "The quick brown [MASK] jumps over the lazy dog."

# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt")

# Get the masked token index
masked_index = torch.where(inputs["input_ids"][0] == tokenizer.mask_token_id)

# Predict missing word using the pre-trained BERT model
with torch.no_grad():
  outputs = model(**inputs)
  predictions = outputs.logits[0, masked_index, :]

# Get the top predicted tokens and their probabilities
top_predictions = torch.topk(predictions, k=5, dim=1)
predicted_tokens = tokenizer.convert_ids_to_tokens(top_predictions.indices[0])
predicted_probabilities = top_predictions.values[0]

# Print the top predicted tokens and their probabilities
for token, probability in zip(predicted_tokens, predicted_probabilities):
  print(f"{token}: {probability:.4f}")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


cat: 9.9688
dog: 9.2704
bear: 8.8879
man: 8.7482
##ie: 8.5207
