# Welcome to this hands-on deep learning class!
# In this notebook, we'll apply essential deep learning concepts to a Heart Disease Prediction dataset.

# Following Tasks will be done:
1. Uploading dataset:

  
    *   By importing libraries 
    *   anually using pandas


2. Use Normalization

3. use Standardization

4. Use different activation functions

5. Use different optimizers

6. How to print model summary

7. How to find number of parameters

8. How model is stored ( in which file format), and how i can share the model to some other PC.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns

2025-06-23 05:56:24.269433: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750658184.489153      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750658184.553139      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# b. Uploading Dataset Manually Using Pandas
# On Kaggle, datasets you add to your notebook are typically available in the '../input/' directory.
# You can find the correct path by clicking on the dataset in the input section on the right,
# then hovering over the file name and clicking 'Copy file path'.

file_path = "/kaggle/input/heart-disease-dataset/heart.csv"
df = pd.read_csv("/kaggle/input/heart-disease-dataset/heart.csv")

In [3]:
print(f"Dataset loaded successfully from: {file_path}")
print("First 5 rows of the dataset:")
print(df.head())
print(f"Shape of the dataset: {df.shape}")
print("\nDataset Info:")
df.info()

Dataset loaded successfully from: /kaggle/input/heart-disease-dataset/heart.csv
First 5 rows of the dataset:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  
Shape of the dataset: (1025, 14)

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  


In [4]:
# Let's check for missing values (important before preprocessing)

print("\nMissing values in each column:")
print(df.isnull().sum())


Missing values in each column:
age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64


In [5]:
# Drop rows with any missing values if present.

df.dropna(inplace=True)
print(f"\nShape after dropping rows with missing values: {df.shape}")


Shape after dropping rows with missing values: (1025, 14)


In [6]:
# Define features (X) and target (y)

X = df.drop('target', axis=1)
y = df['target']

In [7]:
# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [8]:
print(f"\nShape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_test: {y_test.shape}")


Shape of X_train: (820, 13)
Shape of y_train: (820,)
Shape of X_test: (205, 13)
Shape of y_test: (205,)


# Normalization and Standardization

In [9]:
# Identify numerical features for scaling.

numerical_cols = X.columns 
print(numerical_cols)

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')


In [10]:
# Normalization (Min-Max Scaling)
# Scales features to a fixed range, usually 0 to 1.
# Formula: $X_{normalized} = (X - X_{min}) / (X_{max} - X_{min})$

scaler_minmax = MinMaxScaler()
X_train_normalized = X_train.copy()        #shallow copy and deep copy
X_test_normalized = X_test.copy()

X_train_normalized[numerical_cols] = scaler_minmax.fit_transform(X_train[numerical_cols])
X_test_normalized[numerical_cols] = scaler_minmax.transform(X_test[numerical_cols])

In [11]:
print("Original 'age' min/max (example):")
print(f"Train Min: {X_train['age'].min()}, Max: {X_train['age'].max()}")
print("Normalized 'age' min/max (example):")
print(f"Train Min: {X_train_normalized['age'].min():.2f}, Max: {X_train_normalized['age'].max():.2f}")

Original 'age' min/max (example):
Train Min: 29, Max: 77
Normalized 'age' min/max (example):
Train Min: 0.00, Max: 1.00


In [12]:
#Standardization (Z-score Normalization)
# Scales features to have a mean of 0 and a standard deviation of 1.
# Formula: $X_{standardized} = (X - mean(X)) / sigma$

scaler_standard = StandardScaler()
X_train_standardized = X_train.copy()
X_test_standardized = X_test.copy()

X_train_standardized[numerical_cols] = scaler_standard.fit_transform(X_train[numerical_cols])
X_test_standardized[numerical_cols] = scaler_standard.transform(X_test[numerical_cols])


In [13]:
print("Original 'age' mean/std (example):")
print(f"Train Mean: {X_train['age'].mean():.2f}, Std: {X_train['age'].std():.2f}")
print("Standardized 'age' mean/std (example):")
print(f"Train Mean: {X_train_standardized['age'].mean():.2f}, Std: {X_train_standardized['age'].std():.2f}")

Original 'age' mean/std (example):
Train Mean: 54.61, Std: 9.11
Standardized 'age' mean/std (example):
Train Mean: -0.00, Std: 1.00


In [14]:
# We'll use the normalized data for building and training our models.
# It often performs slightly better for models with ReLU activations.

X_train_final = X_train_normalized.values # Convert DataFrame to NumPy array for Keras
X_test_final = X_test_normalized.values

In [15]:
# Get the number of features for the input layer
input_dim = X_train_final.shape[1]
print(f"\nNumber of input features for the model: {input_dim}")


Number of input features for the model: 13


# Using Different Activation Functions

In [16]:
# Activation functions introduce non-linearity into the network, allowing it to learn complex patterns.
# For binary classification (heart disease presence), the output layer typically uses 'sigmoid'.

def build_model_with_activation(activation_function, name_suffix=""):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)), # Input layer
        keras.layers.Dense(64, activation=activation_function, name=f"dense_layer_1_{name_suffix}"),
        keras.layers.Dense(32, activation=activation_function, name=f"dense_layer_2_{name_suffix}"),
        keras.layers.Dense(1, activation='sigmoid', name=f"output_layer_{name_suffix}") # Sigmoid for binary classification
    ], name=f"model_with_{activation_function}_{name_suffix}")
    return model


In [17]:
# Model with ReLU Activation (Rectified Linear Unit)
# ReLU is a common choice for hidden layers due to its computational efficiency.
model_relu = build_model_with_activation('relu', "relu")
model_relu.summary()

2025-06-23 05:57:28.823831: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [18]:
# Example 2: Model with Tanh Activation (Hyperbolic Tangent)
# Tanh squashes values between -1 and 1. Can sometimes work well for hidden layers.
model_tanh = build_model_with_activation('tanh', "tanh")
model_tanh.summary()

In [19]:
# Model with Sigmoid Activation (for hidden layers)
# Sigmoid squashes values between 0 and 1. Less common for hidden layers in deep networks due to vanishing gradients,
# but can be used. (Note: Output layer uses sigmoid for binary classification regardless).
model_sigmoid_hidden = build_model_with_activation('sigmoid', "sigmoid_hidden")
model_sigmoid_hidden.summary()

# Using Different Optimizers

In [20]:
# Define a base model for demonstrating optimizers

def build_base_model_for_optimizer(input_shape):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_shape,)),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

In [21]:
# Example 1: Adam Optimizer (Adaptive Moment Estimation)
# A popular choice, generally performs well with default parameters.

model_adam = build_base_model_for_optimizer(input_dim)
model_adam.compile(optimizer='adam',
                   loss='binary_crossentropy', # For binary classification
                   metrics=['accuracy'])

In [22]:
# To demonstrate training with this optimizer:
print("Training model with Adam (1 epoch for demo)...")
history_adam = model_adam.fit(X_train_final, y_train, epochs=1, validation_split=0.1, verbose=0)
print(f"Adam training accuracy: {history_adam.history['accuracy'][-1]:.4f}")

Training model with Adam (1 epoch for demo)...
Adam training accuracy: 0.6748


In [23]:
# Example 2: SGD Optimizer (Stochastic Gradient Descent)
# The foundational optimizer. Can be slow but allows for careful tuning (e.g., with learning rate schedules).

model_sgd = build_base_model_for_optimizer(input_dim)
# You might want to specify a learning rate for SGD:
sgd_optimizer = keras.optimizers.SGD(learning_rate=0.01)
model_sgd.compile(optimizer=sgd_optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])


In [24]:
# print("Training model with SGD (1 epoch for demo)...")
print("Training model with SGD (1 epoch for demo)...")
history_sgd = model_sgd.fit(X_train_final, y_train, epochs=1, validation_split=0.1, verbose=0)
print(f"SGD training accuracy: {history_sgd.history['accuracy'][-1]:.4f}")

Training model with SGD (1 epoch for demo)...
SGD training accuracy: 0.5298


In [25]:
# Example 3: RMSprop Optimizer (Root Mean Square Propagation)
# Adapts the learning rate for each parameter. Often performs well.

model_rmsprop = build_base_model_for_optimizer(input_dim)
model_rmsprop.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])


In [26]:
print("Training model with RMSprop (1 epoch for demo)...")
history_rmsprop = model_rmsprop.fit(X_train_final, y_train, epochs=1, validation_split=0.1, verbose=0)
print(f"RMSprop training accuracy: {history_rmsprop.history['accuracy'][-1]:.4f}")

Training model with RMSprop (1 epoch for demo)...
RMSprop training accuracy: 0.7046


In [27]:
model_relu.summary()

# Explanation of Model Summary:
# - Layer (type): Name and type of the layer (e.g., Dense).
# - Output Shape: The shape of the tensor output by the layer. `None` indicates batch size, which can vary.
# - Param #: The number of trainable parameters in that layer.
# - Total params: Total number of trainable parameters in the entire model.
# - Trainable params: Parameters that will be updated during training.
# - Non-trainable params: Parameters that will not be updated (e.g., from pre-trained layers if frozen).

#  How to Find Number of Parameters
* The number of parameters indicates the complexity of your model.
* More parameters generally mean a higher capacity to learn, but also a higher risk of overfitting.

In [28]:
# You can get the total number of parameters from the model summary or programmatically.

# From model.summary() output:
print("\nFrom model_relu.summary() output, look for 'Total params'.")


From model_relu.summary() output, look for 'Total params'.


In [29]:
# Programmatically accessing the number of parameters:
total_params = model_relu.count_params()
print(f"\nTotal number of parameters in model_relu (programmatically): {total_params}")


Total number of parameters in model_relu (programmatically): 3009


* Let's break down parameter calculation for a Dense layer:
* A Dense layer with 'n' input units and 'm' output units has:
*   (n * m) weights + m biases = total parameters
* Example: First Dense layer in model_relu has input_dim (number of features) and 64 units.
* If input_dim is 13 (a common number of features for heart disease datasets):
* Parameters for first Dense layer = (input_dim * 64) + 64 = (13 * 64) + 64 = 832 + 64 = 896
* For the second Dense layer (32 units, connected to 64 units from previous layer):
* Parameters = (64 * 32) + 32 = 2048 + 32 = 2080
* For the output Dense layer (1 unit, connected to 32 units from previous layer):
* Parameters = (32 * 1) + 1 = 32 + 1 = 33
* Total: 896 + 2080 + 33 = 3009 (This should match 'Total params' in the summary for model_relu)

# How Model is Stored and Shared

In [30]:
model_to_save = build_base_model_for_optimizer(input_dim)
model_to_save.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

# Train for a few epochs
history = model_to_save.fit(X_train_final, y_train, epochs=2, validation_split=0.1, verbose=1)
print("Model trained.")

Epoch 1/2
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5908 - loss: 0.6704 - val_accuracy: 0.7805 - val_loss: 0.6138
Epoch 2/2
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8298 - loss: 0.5812 - val_accuracy: 0.7927 - val_loss: 0.5387
Model trained.


In [31]:
# Evaluate the trained model
loss, accuracy = model_to_save.evaluate(X_test_final, y_test, verbose=0)
print(f"Model accuracy on test set after training: {accuracy:.4f}")

Model accuracy on test set after training: 0.8244


* Keras models can be saved in several formats:
  1. TensorFlow SavedModel format
  2. H5 format 

# H5 format (HDF5)(.h5)
Hierarchical Data Format version 5 
1. Architecture: layers, their types, connections, activation functions in JSON strings.
2. Weights: the learned parameters area stores as NumPy Arrays.
3. Optimizer State: If model is not fully trained its partially trained then also you save learnig rate, momentum etc to resume your training
   

#  TensorFlow SavedModel Format
1. It creates a directory insted of single file
2. saved_model.pb: Defines the actual TensorFlow graph and metadata about the model.
3. variables/ directory: This subdirectory contains the trained weights and biases of the model
4. assets/ directory (optional): auxiliary files
5. keras_metadata.pb (optional)

In [32]:
# 1. Saving in TensorFlow SavedModel format (recommended)
# This format saves the entire model: architecture, weights, and training configuration.

saved_model_path_tf = "/kaggle/working/my_heart_disease_model_tf_savedmodel.keras"
model_to_save.save(saved_model_path_tf)

In [33]:
# To load the model:
loaded_model_tf = keras.models.load_model(saved_model_path_tf)
print("\nModel loaded from TensorFlow SavedModel format.")
loss_loaded, accuracy_loaded = loaded_model_tf.evaluate(X_test_final, y_test, verbose=0)
print(f"Loaded model accuracy on test set (from SavedModel): {accuracy_loaded:.4f}")


Model loaded from TensorFlow SavedModel format.
Loaded model accuracy on test set (from SavedModel): 0.8244


In [34]:
# 2. Saving in H5 format
# This is a single HDF5 file containing the model's architecture, weights, and optimizer state.

saved_model_path_h5 = "/kaggle/working/my_heart_disease_model.h5"
model_to_save.save(saved_model_path_h5)

In [35]:
# To load the model:
loaded_model_h5 = keras.models.load_model(saved_model_path_h5)
print("\nModel loaded from H5 format.")
loss_loaded, accuracy_loaded = loaded_model_h5.evaluate(X_test_final, y_test, verbose=0)
print(f"Loaded model accuracy on test set (from H5): {accuracy_loaded:.4f}")


Model loaded from H5 format.
Loaded model accuracy on test set (from H5): 0.8244


# How to share the model to some other PC:
# For TensorFlow SavedModel format:
* Copy the entire directory '{saved_model_path_tf}' to the other PC.
* On the other PC, use `keras.models.load_model('path/to/my_heart_disease_model_tf_savedmodel')`.

# For H5 format:
* Copy the single H5 file '{saved_model_path_h5}' to the other PC.
* On the other PC, use `keras.models.load_model('path/to/my_heart_disease_model.h5')`.

# Important Considerations for Sharing:
* Environment: The other PC should have compatible versions of Python, TensorFlow, and Keras.
* Data: Ensure the other PC has access to the data in the same format if you plan to continue training or evaluate.
* Custom Objects: If your model uses custom layers, activation functions, or loss functions, you'll need to provide them when loading.


# --- End of Notebook ---
# Feel free to experiment with different parameters, architectures, and explore more advanced preprocessing techniques for this dataset.