# Neutal networks with keras 

- Understainding and implemeting a sequential model in Keras
- add dense layers and activation functions
- analyze a model's architecture and summary 


In [2]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation



In [3]:
# Creating a Sequential model
model = Sequential()

### Adding Layers 

We can add **Dense layers** (fully connected layers) and specify **activation functions**.
The activation functions introduce **non-linearity**, helping the model learn complex patterns.

In [5]:
# Defining a neural network with two hidden layers (32 neurons each)
model = Sequential([
    Dense(32, input_shape=(784,)),  # Input layer with 784 features
    Activation('relu'),             # First hidden layer with !ReLU! activation
    Dense(32),
    Activation('relu'),             # Second hidden layer with ReLU activation
    Dense(3),
    Activation('softmax')           # Output layer with 3 categories # the output layer has 3 neurons 
])

# Display model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Building a model using .add() method

In [7]:

# Define a Sequential model using .add() method
model = Sequential()
model.add(Dense(64, input_shape=(20,)))  # First hidden layer with 64 neurons
model.add(Activation('relu'))
model.add(Dense(32))  # Second hidden layer with 32 neurons
model.add(Activation('relu'))
model.add(Dense(3))  # Output layer with 3 classes
model.add(Activation('softmax'))

# Display the model summary
model.summary()

## Generating a dataset for classification 
- To train our model, we need a dataset. Here, we generate a synthetic dataset with 1000 samples and 20 features, where the target variable has 3 classes.

In [9]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import make_classification

# Generate a synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_informative=15, random_state=42)

# Convert labels to categorical format
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)


### compiling the model 

This step is crucial as it configures the learning process by specifying: 
- Optimizer → How the model updates weights to minimize errors.
- Loss Function → Measures how far the model's predictions are from the actual labels.
- Metrics → Used to evaluate performance (e.g., accuracy).

In [11]:

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print("Model compiled successfully! ✅")

Model compiled successfully! ✅


### Training the model 

In [13]:

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test), verbose=1)


Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3880 - loss: 1.3357 - val_accuracy: 0.5500 - val_loss: 0.9542
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6852 - loss: 0.7680 - val_accuracy: 0.6500 - val_loss: 0.8202
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7603 - loss: 0.6458 - val_accuracy: 0.6900 - val_loss: 0.7485
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8071 - loss: 0.5402 - val_accuracy: 0.6850 - val_loss: 0.7167
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8229 - loss: 0.4951 - val_accuracy: 0.7100 - val_loss: 0.7009
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8647 - loss: 0.4181 - val_accuracy: 0.7300 - val_loss: 0.6690
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━━

In [14]:

# Evaluate model accuracy
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

print(f"✅ Training Accuracy: {train_acc:.4f}")
print(f"✅ Testing Accuracy: {test_acc:.4f}")

✅ Training Accuracy: 0.9362
✅ Testing Accuracy: 0.7300


### Making Predictions 

In [16]:
#predict 5 test samples and compare the predicted labels with actual labels 
# Predict some data points
predictions = model.predict(X_test[:5])
predicted_classes = np.argmax(predictions, axis=1)
actual_classes = np.argmax(y_test[:5], axis=1)

# Display results
print(f"Predicted Classes: {predicted_classes}")
print(f"Actual Classes: {actual_classes}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Predicted Classes: [1 2 0 1 1]
Actual Classes: [1 2 0 0 2]


## Using a different optimizer - Stochastic Gradient Descent (SGD) 

- so far, we have used the Adam Optimizer, which adapts the learning rate dynamically. Now let's explicitly import SGD and loss functions and tweak some hyperparameters. 

In [28]:

# Importing SGD optimizer and loss function explicitly
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import CategoricalCrossentropy

# Creating a new model to test SGD optimizer
sgd_model = Sequential()
sgd_model.add(Dense(64, input_shape=(20,)))  # First hidden layer with 64 neurons
sgd_model.add(Activation('relu'))
sgd_model.add(Dense(32))  # Second hidden layer with 32 neurons
sgd_model.add(Activation('relu'))
sgd_model.add(Dense(3))  # Output layer with 3 classes
sgd_model.add(Activation('softmax'))

# Display the model structure
sgd_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### compiling the model with SGD optimizer 



Instead of using **Adam**, we now use **SGD** with the following parameters:
- **Learning Rate (`lr=0.01`)**: Controls the step size of weight updates.
- **Momentum (`momentum=0.9`)**: Helps accelerate training by maintaining past gradients.
- **Loss Function**: `CategoricalCrossentropy()` (explicitly imported).

Why Change the Optimizer?
- **SGD** is useful when training with large datasets as it updates weights more frequently.
- **Momentum** helps avoid getting stuck in local minima.

In [34]:

# Compile the model using SGD optimizer with custom parameters
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)  # Custom learning rate and momentum
loss_function = CategoricalCrossentropy()

sgd_model.compile(optimizer=sgd_optimizer, loss=loss_function, metrics=['accuracy'])




### training the model with SGD 



In [37]:
# Train the model with SGD optimizer
sgd_history = sgd_model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test), verbose=1)

Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5203 - loss: 1.0533 - val_accuracy: 0.6800 - val_loss: 0.7820
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8103 - loss: 0.4971 - val_accuracy: 0.7400 - val_loss: 0.6757
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8623 - loss: 0.4276 - val_accuracy: 0.7450 - val_loss: 0.6193
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8802 - loss: 0.3148 - val_accuracy: 0.7600 - val_loss: 0.5934
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9211 - loss: 0.2363 - val_accuracy: 0.7500 - val_loss: 0.6363
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9434 - loss: 0.1839 - val_accuracy: 0.8000 - val_loss: 0.5797
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━━

In [39]:
#evaluate model performance 

# Evaluate model trained with SGD
sgd_train_loss, sgd_train_acc = sgd_model.evaluate(X_train, y_train, verbose=0)
sgd_test_loss, sgd_test_acc = sgd_model.evaluate(X_test, y_test, verbose=0)

print(f"✅ Training Accuracy with SGD: {sgd_train_acc:.4f}")
print(f"✅ Testing Accuracy with SGD: {sgd_test_acc:.4f}")


✅ Training Accuracy with SGD: 0.9875
✅ Testing Accuracy with SGD: 0.7750


### Making predictions w SGD model 

In [42]:
# Predict some test data points using the SGD-trained model
sgd_predictions = sgd_model.predict(X_test[:5])
sgd_predicted_classes = np.argmax(sgd_predictions, axis=1)
sgd_actual_classes = np.argmax(y_test[:5], axis=1)

# Display results
print(f"Predicted Classes (SGD): {sgd_predicted_classes}")
print(f"Actual Classes: {sgd_actual_classes}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Predicted Classes (SGD): [1 2 0 0 1]
Actual Classes: [1 2 0 0 2]
