<a href="https://colab.research.google.com/github/2786sanjana/Data-Science-Assignment-eCommerce-Transactions-Dataset/blob/main/ECG_HeartDisease_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================== ECG Deep Learning Project (ANN + CNN) ==================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
import os

# ----------------------------------------------------------
# Function to safely load CSV from multiple possible paths
# ----------------------------------------------------------
def safe_load_csv(filename):
    possible_paths = [
        f"/mnt/data/{filename}",
        f"./{filename}",
        f"/content/{filename}",           # For Google Colab users
        f"/kaggle/working/{filename}"     # For Kaggle users
    ]

    for path in possible_paths:
        if os.path.exists(path):
            print(f"‚úî File found at: {path}")
            return pd.read_csv(path, header=None)

    raise FileNotFoundError(
        f"‚ùå '{filename}' not found in /mnt/data, working directory, or Colab/Kaggle paths.\n"
        f"‚û° Please upload '{filename}' to the environment."
    )

# --------------------- Load Datasets ---------------------
train = safe_load_csv("mitbih_train.csv")
test = safe_load_csv("mitbih_test.csv")

print("Raw train shape:", train.shape)
print("Raw test shape:", test.shape)

# --------------------- Clean Labels ---------------------
valid_classes = [0, 1, 2, 3, 4]
train = train[train.iloc[:, -1].isin(valid_classes)]
test = test[test.iloc[:, -1].isin(valid_classes)]

# --------------------- Split Features / Labels ---------------------
X_train = train.iloc[:, :-1].values.astype("float32")
y_train = train.iloc[:, -1].values.astype("int64")
X_test = test.iloc[:, :-1].values.astype("float32")
y_test = test.iloc[:, -1].values.astype("int64")

# --------------------- Normalization ---------------------
X_train /= np.max(X_train)
X_test /= np.max(X_test)

# --------------------- One-hot Encoding ---------------------
y_train_cat = to_categorical(y_train, num_classes=5)
y_test_cat = to_categorical(y_test, num_classes=5)

# --------------------- ANN MODEL ---------------------
ann = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(64, activation="relu"),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(5, activation="softmax")
])

ann.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
ann_history = ann.fit(
    X_train, y_train_cat,
    epochs=15, batch_size=256,
    validation_data=(X_test, y_test_cat),
    verbose=1
)

# --------------------- CNN MODEL ---------------------
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

cnn = Sequential([
    Conv1D(32, 5, activation="relu", input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(64, 5, activation="relu"),
    MaxPooling1D(2),
    Flatten(),
    Dense(64, activation="relu"),
    Dropout(0.3),
    Dense(5, activation="softmax")
])

cnn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
cnn_history = cnn.fit(
    X_train_cnn, y_train_cat,
    epochs=15, batch_size=256,
    validation_data=(X_test_cnn, y_test_cat),
    verbose=1
)

# --------------------- Evaluation & Graphics ---------------------

# Accuracy Plot
plt.plot(ann_history.history["accuracy"], label="ANN Train")
plt.plot(ann_history.history["val_accuracy"], label="ANN Test")
plt.plot(cnn_history.history["accuracy"], label="CNN Train")
plt.plot(cnn_history.history["val_accuracy"], label="CNN Test")
plt.title("Accuracy Comparison")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# Loss Plot
plt.plot(ann_history.history["loss"], label="ANN Loss")
plt.plot(ann_history.history["val_loss"], label="ANN Val Loss")
plt.plot(cnn_history.history["loss"], label="CNN Loss")
plt.plot(cnn_history.history["val_loss"], label="CNN Val Loss")
plt.title("Loss Comparison")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Confusion Matrix (CNN Predictions)
y_pred = np.argmax(cnn.predict(X_test_cnn), axis=1)
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, cmap="Blues", fmt="d")
plt.title("Confusion Matrix (CNN)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

# Classification Report
print("\n===== CLASSIFICATION REPORT (CNN) =====\n")
print(classification_report(y_test, y_pred))


In [None]:
import os

print("CSV files in /mnt/data:")
for f in os.listdir('/mnt/data'):
    if f.lower().endswith(".csv"):
        print(" ‚Üí", f)


In [None]:
# ------------------------- Imports -------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten

# ------------------------- Load Data -------------------------
train = pd.read_csv("mitbih_train.csv", header=None)
test = pd.read_csv("mitbih_test.csv", header=None)

X_train = train.iloc[:, :-1].values.astype(np.float32)
y_train = train.iloc[:, -1].values.astype(int)

X_test = test.iloc[:, :-1].values.astype(np.float32)
y_test = test.iloc[:, -1].values.astype(int)

# ------------------------- Standardization -------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# For CNN reshape
X_train_cnn = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_cnn  = X_test_scaled.reshape(X_test_scaled.shape[0],  X_test_scaled.shape[1],  1)

n_classes = len(np.unique(y_train))

# ------------------------- ANN Model -------------------------
ann = Sequential([
    Dense(256, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(n_classes, activation='softmax')
])

ann.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_ann = ann.fit(X_train_scaled, y_train, epochs=10, batch_size=64, validation_split=0.2)

ann_pred = np.argmax(ann.predict(X_test_scaled), axis=1)
print("\nüîπ ANN Accuracy:", accuracy_score(y_test, ann_pred))
print(classification_report(y_test, ann_pred))

# ------------------------- CNN Model -------------------------
cnn = Sequential([
    Conv1D(32, kernel_size=5, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(64, kernel_size=5, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(n_classes, activation='softmax')
])

cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_cnn = cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_split=0.2)

cnn_pred = np.argmax(cnn.predict(X_test_cnn), axis=1)
print("\nüîπ CNN Accuracy:", accuracy_score(y_test, cnn_pred))
print(classification_report(y_test, cnn_pred))

# ------------------------- Graphs -------------------------
# ANN Graphs
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history_ann.history['accuracy'], label="Train Acc")
plt.plot(history_ann.history['val_accuracy'], label="Val Acc")
plt.title("ANN Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history_ann.history['loss'], label="Train Loss")
plt.plot(history_ann.history['val_loss'], label="Val Loss")
plt.title("ANN Loss")
plt.legend()
plt.show()

# CNN Graphs
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history_cnn.history['accuracy'], label="Train Acc")
plt.plot(history_cnn.history['val_accuracy'], label="Val Acc")
plt.title("CNN Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history_cnn.history['loss'], label="Train Loss")
plt.plot(history_cnn.history['val_loss'], label="Val Loss")
plt.title("CNN Loss")
plt.legend()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten

# ------------------ LOAD DATA ------------------
train = pd.read_csv("mitbih_train.csv", header=None)
test  = pd.read_csv("mitbih_test.csv", header=None)

# ------------------ FIX LABEL COLUMN ------------------
# Remove rows where label is NaN or invalid
train = train[train.iloc[:, -1].notna()]
test  = test[test.iloc[:, -1].notna()]

# Convert labels to int safely
train.iloc[:, -1] = train.iloc[:, -1].astype(int)
test.iloc[:, -1] = test.iloc[:, -1].astype(int)

# Keep only valid labels (0‚Äì4)
valid_labels = [0, 1, 2, 3, 4]
train = train[train.iloc[:, -1].isin(valid_labels)]
test  = test[test.iloc[:, -1].isin(valid_labels)]

print("Labels cleaned ‚úì")

X_train = train.iloc[:, :-1].values.astype(np.float32)
y_train = train.iloc[:, -1].values.astype(int)

X_test = test.iloc[:, :-1].values.astype(np.float32)
y_test = test.iloc[:, -1].values.astype(int)

# ------------------ NORMALIZATION ------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# For CNN reshape
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn  = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

n_classes = len(np.unique(y_train))

# ------------------ ANN ------------------
ann = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(n_classes, activation='softmax')
])

ann.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_ann = ann.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

pred_ann = np.argmax(ann.predict(X_test), axis=1)
print("\nANN Accuracy:", accuracy_score(y_test, pred_ann))
print(classification_report(y_test, pred_ann))

# ------------------ CNN ------------------
cnn = Sequential([
    Conv1D(32, 5, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(64, 5, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(n_classes, activation='softmax')
])

cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_cnn = cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_split=0.2)

pred_cnn = np.argmax(cnn.predict(X_test_cnn), axis=1)
print("\nCNN Accuracy:", accuracy_score(y_test, pred_cnn))
print(classification_report(y_test, pred_cnn))

# ------------------ TRAINING GRAPHS ------------------
plt.figure(figsize=(12,5))
plt.plot(history_ann.history['accuracy'], label="ANN Train Acc")
plt.plot(history_ann.history['val_accuracy'], label="ANN Val Acc")
plt.title("ANN Accuracy")
plt.legend()
plt.show()

plt.figure(figsize=(12,5))
plt.plot(history_cnn.history['accuracy'], label="CNN Train Acc")
plt.plot(history_cnn.history['val_accuracy'], label="CNN Val Acc")
plt.title("CNN Accuracy")
plt.legend()
plt.show()
