<a href="https://colab.research.google.com/github/Sri-Deepthi-N/CAD_Phase1/blob/main/SHMS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Step 1: Load the dataset
file_path = '/content/heart_disease.csv'
data = pd.read_csv(file_path)

# Step 2: Preprocess the data
# Drop the target column if present (unsupervised anomaly detection)
if 'Heart Disease Status' in data.columns:
    X = data.drop(columns=['Heart Disease Status'])
else:
    X = data

# Handle missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Normalize the data using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Step 3: Train the Isolation Forest model
model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
model.fit(X_scaled)

# Step 4: Predict anomalies
# '-1' indicates anomalies, '1' indicates normal data
anomaly_predictions = model.predict(X_scaled)

# Convert predictions to binary labels (0: normal, 1: anomaly)
data['Anomaly'] = (anomaly_predictions == -1).astype(int)

# Step 5: Print heart rate for detected anomalies
if 'CRP Level' in data.columns:
    print("CRP Levels of Detected Anomalies:")
    anomalies = data[data['Anomaly'] == 1]
    for index, row in anomalies.iterrows():
        print(f"Anomaly detected! CRP Level: {row['CRP Level']}")
else:
    print("The 'CRP Level' column was not found in the dataset.")


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv('/content/heart_disease.csv')

# Select a single feature for time-series prediction (e.g., Blood Pressure)
time_series_data = data['Blood Pressure'].dropna().values.reshape(-1, 1)

# Scale the data
scaler = MinMaxScaler()
time_series_data = scaler.fit_transform(time_series_data)

# Create time-series sequences for LSTM
lookback = 5
X, y = [], []
for i in range(len(time_series_data) - lookback):
    X.append(time_series_data[i:i + lookback])
    y.append(time_series_data[i + lookback])

X, y = np.array(X), np.array(y)

# Split the data into 80% train and 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Define LSTM model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(lookback, 1)),
    Dense(1)  # Predict the next value
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=1)

# Evaluate the model on the test data
test_loss = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}")

# Make predictions
predictions = model.predict(X_test)

# Inverse transform predictions and test labels
predictions = scaler.inverse_transform(predictions)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# Results comparison
results = pd.DataFrame({'Actual': y_test_inv.flatten(), 'Predicted': predictions.flatten()})
print(results.head())


Epoch 1/5


  super().__init__(**kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.1472
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0873
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0875
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0870
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0867
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0862
Test Loss: 0.08494746685028076
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
   Actual   Predicted
0   133.0  150.201752
1   178.0  149.754837
2   166.0  150.052643
3   162.0  150.620895
4   175.0  150.570694


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import pandas as pd

# Load the data
data = pd.read_csv('/content/heart_disease.csv')

# Ensure column names are clean
data.columns = data.columns.str.strip()

# Identify the target column and features
target_column = "Heart Disease Status"  # Target column
X = data.drop(columns=[target_column]).fillna(0)  # Features
y = data[target_column]  # Target

# Encode the target column if it is categorical
if y.dtype == 'object':
    y = y.astype('category').cat.codes

# Check class distribution
print("Class Distribution:\n", y.value_counts())

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (scaling to zero mean and unit variance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the SVM model with class balancing
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', class_weight='balanced', random_state=42)
svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=0))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save the predictions
results = pd.DataFrame({
    'Actual': y_test,
    'Predicted': y_pred
})

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('/content/heart_disease.csv')

# Preprocessing: Handle missing values, if any
data = data.dropna()

# Select features for anomaly detection (exclude target column if present)
features = data.drop(columns=['target'], errors='ignore')  # Replace 'target' with actual target column name if applicable

# Normalize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Split the data for training and testing (80% for training, 20% for testing)
X_train, X_test = train_test_split(scaled_features, test_size=0.2, random_state=42)

# Train Isolation Forest
model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
model.fit(X_train)

# Predict anomalies for the test set (-1: anomaly, 1: normal)
predictions = model.predict(X_test)

# Add predictions back to the data for analysis
test_results = pd.DataFrame(X_test, columns=features.columns)
test_results['Anomaly'] = predictions


# If 'CRP Level' column is present, print CRP Levels of detected anomalies
if 'CRP Level' in data.columns:
    print("\nCRP Levels of Detected Anomalies:")
    anomalies = test_results[test_results['Anomaly'] == -1]  # Anomalies are labeled as -1
    for index, row in anomalies.iterrows():
        print(f"Anomaly detected! CRP Level: {row['CRP Level']}")
else:
    print("The 'CRP Level' column was not found in the dataset.")


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load your dataset from CSV
data = pd.read_csv('heart_disease.csv')

# Ensure there are no NaN or infinite values in the data
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()

# Step 1: Split the data into training and testing (80% for training, 20% for testing)
train_data, test_data = train_test_split(data.values, test_size=0.2, random_state=42)

# Normalize the data (use MinMaxScaler to scale the data to a range of [0,1])
scaler = MinMaxScaler()
train_data_scaled = scaler.fit_transform(train_data)
test_data_scaled = scaler.transform(test_data)

# Step 2: Define the Autoencoder model
input_dim = train_data_scaled.shape[1]

input_layer = Input(shape=(input_dim,))
encoded = Dense(16, activation='relu')(input_layer)
encoded = Dense(8, activation='relu')(encoded)
decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(decoded)  # Changed from 'sigmoid' to 'linear'

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Step 3: Train the model
autoencoder.fit(train_data_scaled, train_data_scaled,
                epochs=5,
                batch_size=32,
                shuffle=True,
                validation_data=(test_data_scaled, test_data_scaled))

# Step 4: Evaluate on test data
# Calculate reconstruction errors
reconstructed = autoencoder.predict(test_data_scaled)
reconstruction_errors = np.mean(np.square(test_data_scaled - reconstructed), axis=1)

# Step 5: Set a threshold for anomalies
threshold = np.percentile(reconstruction_errors, 95)  # Adjust based on requirements

# Flag anomalies (1 = anomaly, 0 = normal)
anomalies = reconstruction_errors > threshold

# Assume you have ground truth labels for anomalies in `ground_truth_labels` (1 for anomaly, 0 for normal)
# Replace this with your actual labels
ground_truth_labels = np.random.choice([0, 1], size=len(test_data))  # Example ground truth labels

# Example: Inverse transform the predictions and actual data to the original scale
predictions = autoencoder.predict(test_data_scaled)
predictions = scaler.inverse_transform(predictions)

actual_values = scaler.inverse_transform(test_data)

# Create a DataFrame for displaying Actual vs Predicted values
results = pd.DataFrame({
    'Actual': actual_values.flatten(),  # Flatten the actual data for easy comparison
    'Predicted': predictions.flatten()   # Flatten the predictions for easy comparison
})

print(results.head())


Epoch 1/5
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2487 - val_loss: 0.1351
Epoch 2/5
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1314 - val_loss: 0.1214
Epoch 3/5
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1191 - val_loss: 0.1102
Epoch 4/5
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1090 - val_loss: 0.1056
Epoch 5/5
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1045 - val_loss: 0.1033
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
    Actual   Predicted
0   2622.0   54.059902
1      2.0    1.306432
2  10080.0  150.822052
3  33450.0  227.518082
4      5.0    2.106972


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
file_path = "heart_disease.csv"  # Change path if needed
df = pd.read_csv(file_path)

# Step 2: Data Preprocessing
df_cleaned = df.dropna()  # Drop missing values

# Separate features and target variable
X = df_cleaned.drop(columns=["Heart Disease Status"])
y = df_cleaned["Heart Disease Status"]

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 3: Apply Isolation Forest for Anomaly Detection
iso_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
anomaly_scores = iso_forest.fit_predict(X_train)

# Identify anomalies (outliers are labeled as -1)
anomalies = np.where(anomaly_scores == -1)[0]

# Step 4: Train XGBoost Classifier
clf = XGBClassifier(eval_metric="logloss")
clf.fit(X_train, y_train)

# Step 5: Evaluate the model
y_pred = clf.predict(X_test)
overall_accuracy = accuracy_score(y_test, y_pred)  # No need to multiply by 100
print(f"Overall Model Accuracy: {overall_accuracy:.2f}")

# Step 6: Compute accuracy and loss for each anomaly
print("\nAccuracy and Loss for Each Anomaly:")
all_anomalies_X = X_train[anomalies]  # Get all anomalies
all_anomalies_y = y_train.iloc[anomalies]  # Get actual labels

# Predict all anomalies
all_anomalies_pred = clf.predict(all_anomalies_X)

# Compute overall anomaly accuracy
anomaly_accuracy = accuracy_score(all_anomalies_y, all_anomalies_pred)  # No need to multiply by 100
accuracy_loss = overall_accuracy - anomaly_accuracy  # Compute accuracy loss

# Print each anomaly's accuracy and loss
for i, idx in enumerate(anomalies):
    feature_values = X_train[idx]  # Get feature values of the anomaly
    predicted_label = all_anomalies_pred[i]  # Get predicted label
    actual_label = all_anomalies_y.iloc[i]  # Get actual label

    # Calculate accuracy for each anomaly
    anomaly_accuracy_for_this = 1.0 if predicted_label == actual_label else 0.0

    # Loss is the difference from overall accuracy
    anomaly_loss_for_this = overall_accuracy - anomaly_accuracy_for_this

    print(f"Anomaly {i+1}: Accuracy = {anomaly_accuracy_for_this:.2f}, Loss = {anomaly_loss_for_this:.2f}")

# Print final summary
print(f"\nOverall Anomaly Detection Accuracy: {anomaly_accuracy:.2f}")
print(f"Overall Accuracy Loss Due to Anomalies: {accuracy_loss:.2f}")


Overall Model Accuracy: 0.79

Accuracy and Loss for Each Anomaly:
Anomaly 1: Accuracy = 1.00, Loss = -0.21
Anomaly 2: Accuracy = 1.00, Loss = -0.21
Anomaly 3: Accuracy = 1.00, Loss = -0.21
Anomaly 4: Accuracy = 1.00, Loss = -0.21
Anomaly 5: Accuracy = 1.00, Loss = -0.21
Anomaly 6: Accuracy = 1.00, Loss = -0.21
Anomaly 7: Accuracy = 1.00, Loss = -0.21
Anomaly 8: Accuracy = 1.00, Loss = -0.21
Anomaly 9: Accuracy = 1.00, Loss = -0.21
Anomaly 10: Accuracy = 1.00, Loss = -0.21
Anomaly 11: Accuracy = 1.00, Loss = -0.21
Anomaly 12: Accuracy = 1.00, Loss = -0.21
Anomaly 13: Accuracy = 1.00, Loss = -0.21
Anomaly 14: Accuracy = 1.00, Loss = -0.21
Anomaly 15: Accuracy = 1.00, Loss = -0.21
Anomaly 16: Accuracy = 1.00, Loss = -0.21
Anomaly 17: Accuracy = 1.00, Loss = -0.21
Anomaly 18: Accuracy = 1.00, Loss = -0.21
Anomaly 19: Accuracy = 1.00, Loss = -0.21
Anomaly 20: Accuracy = 1.00, Loss = -0.21
Anomaly 21: Accuracy = 1.00, Loss = -0.21
Anomaly 22: Accuracy = 1.00, Loss = -0.21
Anomaly 23: Accurac

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="xgboost")

# Step 1: Load the dataset
file_path = "Dataset Heart Disease.csv"  # Change path if needed
df = pd.read_csv(file_path)

# Drop unnecessary columns
df_cleaned = df.drop(columns=["Unnamed: 0"])

# Define features and target
X = df_cleaned.drop(columns=["target"])
y = df_cleaned["target"].astype(int)  # Ensure y is an integer

# Identify numerical columns for scaling
num_cols = ["age", "resting bps", "cholesterol", "max heart rate", "oldpeak"]

# Scale numerical features only
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Step 2: Split dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to NumPy arrays for XGBoost compatibility
X_train, X_test, y_train, y_test = X_train.values, X_test.values, y_train.values, y_test.values

# Step 3: Apply Isolation Forest for Anomaly Detection
iso_forest = IsolationForest(n_estimators=200, contamination=0.05, random_state=42)
anomaly_scores = iso_forest.fit_predict(X_train)

# Identify anomalies (outliers are labeled as -1)
anomalies = np.where(anomaly_scores == -1)[0]

# Remove detected anomalies from training data to improve accuracy
X_train_clean = np.delete(X_train, anomalies, axis=0)
y_train_clean = np.delete(y_train, anomalies, axis=0)

# Step 4: Train XGBoost Classifier with Optimized Hyperparameters
clf = XGBClassifier(n_estimators=200, learning_rate=0.05, max_depth=6, eval_metric="logloss", use_label_encoder=False)

clf.fit(X_train_clean, y_train_clean)

# Step 5: Evaluate the model
y_pred = clf.predict(X_test)
overall_accuracy = accuracy_score(y_test, y_pred)
print(f"Overall Model Accuracy (After Removing Anomalies): {overall_accuracy:.2f}")

# Step 6: Compute accuracy and loss for anomalies
if len(anomalies) > 0:
    print(f"\nTotal number of anomalies detected: {len(anomalies)}")

    # Get all anomaly data
    all_anomalies_X = X_train[anomalies]
    all_anomalies_y = y_train[anomalies]

    # Predict anomalies
    all_anomalies_pred = clf.predict(all_anomalies_X)

    # Compute overall anomaly accuracy
    anomaly_accuracy = accuracy_score(all_anomalies_y, all_anomalies_pred)
    accuracy_loss = overall_accuracy - anomaly_accuracy

    # Print each anomaly’s accuracy and loss
    print("\nAnomaly Details and Accuracy Loss:")
    for i, idx in enumerate(anomalies):
        predicted_label = all_anomalies_pred[i]
        actual_label = all_anomalies_y[i]

        # Accuracy per anomaly
        anomaly_accuracy_for_this = 1.0 if predicted_label == actual_label else 0.0
        anomaly_loss_for_this = overall_accuracy - anomaly_accuracy_for_this

        print(f"Anomaly {i+1}:  "
              f"Accuracy = {anomaly_accuracy_for_this:.2f}, Loss = {anomaly_loss_for_this:.2f}")

    # Display all detected anomalies
    anomalies_df = pd.DataFrame(X_train[anomalies], columns=X.columns)
    anomalies_df["Actual Target"] = all_anomalies_y
    anomalies_df["Predicted Target"] = all_anomalies_pred

    # Print final anomaly detection summary
    print(f"\nOverall Anomaly Detection Accuracy: {anomaly_accuracy:.2f}")
    print(f"Overall Accuracy Loss Due to Anomalies: {accuracy_loss:.2f}")
else:
    print("\nNo anomalies detected in the dataset.")


Overall Model Accuracy (After Removing Anomalies): 0.73

Total number of anomalies detected: 42

Anomaly Details and Accuracy Loss:
Anomaly 1:  Accuracy = 1.00, Loss = -0.27
Anomaly 2:  Accuracy = 0.00, Loss = 0.73
Anomaly 3:  Accuracy = 1.00, Loss = -0.27
Anomaly 4:  Accuracy = 1.00, Loss = -0.27
Anomaly 5:  Accuracy = 1.00, Loss = -0.27
Anomaly 6:  Accuracy = 0.00, Loss = 0.73
Anomaly 7:  Accuracy = 0.00, Loss = 0.73
Anomaly 8:  Accuracy = 1.00, Loss = -0.27
Anomaly 9:  Accuracy = 0.00, Loss = 0.73
Anomaly 10:  Accuracy = 1.00, Loss = -0.27
Anomaly 11:  Accuracy = 1.00, Loss = -0.27
Anomaly 12:  Accuracy = 1.00, Loss = -0.27
Anomaly 13:  Accuracy = 1.00, Loss = -0.27
Anomaly 14:  Accuracy = 1.00, Loss = -0.27
Anomaly 15:  Accuracy = 1.00, Loss = -0.27
Anomaly 16:  Accuracy = 1.00, Loss = -0.27
Anomaly 17:  Accuracy = 1.00, Loss = -0.27
Anomaly 18:  Accuracy = 1.00, Loss = -0.27
Anomaly 19:  Accuracy = 1.00, Loss = -0.27
Anomaly 20:  Accuracy = 1.00, Loss = -0.27
Anomaly 21:  Accuracy

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
file_path = "Dataset Heart Disease.csv"  # Adjust path if needed
df = pd.read_csv(file_path)

# Drop unnecessary columns
df_cleaned = df.drop(columns=["Unnamed: 0"])

# Define features and target
X = df_cleaned.drop(columns=["target"])
y = df_cleaned["target"].astype(int)  # Ensure target is an integer

# Identify numerical columns for scaling
num_cols = ["age", "resting bps", "cholesterol", "max heart rate", "oldpeak"]

# Scale numerical features
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Step 2: Split dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train an XGBoost classifier
xgb_clf = XGBClassifier(n_estimators=200, learning_rate=0.05, max_depth=6, eval_metric="logloss", use_label_encoder=False)
xgb_clf.fit(X_train, y_train)

# Step 4: Predict on the test set
y_pred = xgb_clf.predict(X_test)

# Step 5: Compute overall accuracy and loss
accuracy = accuracy_score(y_test, y_pred)
accuracy_loss = 1 - accuracy  # Loss is 1 - accuracy

# Print overall accuracy and loss with high precision
print(f"Model Accuracy: {accuracy:.5f}")
print(f"Model Loss: {accuracy_loss:.5f}")

# Step 6: Disease mapping (adjust based on your dataset)
# Mapping numeric target values to actual disease names
disease_mapping = {
    0: "Heart Attack",
    1: "Heart Failure",
    2: "Congenital Heart Disease",
    3: "Arrhythmia"  # Add any other disease types if necessary
}

# Map actual and predicted target values to disease names
y_test_mapped = [disease_mapping[val] for val in y_test]
y_pred_mapped = [disease_mapping[val] for val in y_pred]

# Step 7: Calculate accuracy and loss for each row
accuracy_per_row = [1 if actual == predicted else 0 for actual, predicted in zip(y_test, y_pred)]
loss_per_row = [1 - acc for acc in accuracy_per_row]

# Create a DataFrame for disease names with corresponding accuracy/loss
results_df = pd.DataFrame({
    'Disease Type': y_pred_mapped,  # Showing predicted disease name
    'Accuracy Rate': accuracy_per_row,
    'Loss Rate': loss_per_row
})

# Print all rows without rounding off accuracy and loss rates
pd.set_option('display.float_format', '{:.5f}'.format)  # Set precision for display

print("\nAll Test Cases with Disease Types, Accuracy, and Loss Rates:")
print(results_df)


Parameters: { "use_label_encoder" } are not used.



Model Accuracy: 0.74286
Model Loss: 0.25714

All Test Cases with Disease Types, Accuracy, and Loss Rates:
      Disease Type  Accuracy Rate  Loss Rate
0     Heart Attack              0          1
1     Heart Attack              0          1
2    Heart Failure              0          1
3    Heart Failure              0          1
4     Heart Attack              1          0
..             ...            ...        ...
205  Heart Failure              1          0
206  Heart Failure              1          0
207  Heart Failure              1          0
208  Heart Failure              1          0
209  Heart Failure              1          0

[210 rows x 3 columns]


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Load the dataset
df = pd.read_csv('/content/Dataset Heart Disease.csv')

# Print the column names to ensure correct referencing
print(df.columns)

# Normalize the features (replace these with your actual column names)
features = ["age", "resting bps", "cholesterol", "max heart rate", "oldpeak"]
target = 'target'  # Target column for prediction

# Normalize the features and the target separately
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(df[features])

# Normalize the target separately
scaler_target = MinMaxScaler(feature_range=(0, 1))
scaled_target = scaler_target.fit_transform(df[[target]])

# Prepare data for LSTM (assuming the time series data is already ordered)
look_back = 30  # Number of previous time steps to look at (e.g., last 30 time steps)

# Function to create datasets for LSTM
def create_dataset(data, target, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back)])
        y.append(target[i + look_back])
    return np.array(X), np.array(y)

X, y = create_dataset(scaled_features, scaled_target, look_back)

# Reshape the data for LSTM input (samples, time steps, features)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2])

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 3: Build the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))  # Predicting one value (e.g., target)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Step 4: Train the Model
model.fit(X_train, y_train, epochs=20, batch_size=32)

# Step 5: Evaluate the Model
predictions = model.predict(X_test)

# Inverse transform the predictions (only for the target)
predictions = scaler_target.inverse_transform(predictions)

# Inverse transform the actual values (only for the target)
y_test_actual = scaler_target.inverse_transform(y_test)

# Example: Calculate Mean Absolute Error
mae = mean_absolute_error(y_test_actual, predictions)
print(f'Mean Absolute Error: {mae}')


Index(['Unnamed: 0', 'age', 'sex', 'chest pain type', 'resting bps',
       'cholesterol', 'fasting blood sugar', 'resting ecg', 'max heart rate',
       'exercise angina', 'oldpeak', 'ST slope', 'target'],
      dtype='object')


  super().__init__(**kwargs)


Epoch 1/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - loss: 0.3016
Epoch 2/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 0.2444
Epoch 3/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.2440
Epoch 4/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.2394
Epoch 5/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.2433
Epoch 6/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.2408
Epoch 7/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.2392
Epoch 8/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 0.2489
Epoch 9/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.2428
Epoch 10/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.2399

In [None]:
pip nstall tensorflow xgboost pandas numpy scikit-learn hummingbird-ml


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define your model with Input layer
model = models.Sequential([
    layers.Input(shape=(10,)),  # Input layer
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Save the model in the native Keras format
model.save('/content/IsolationForest.keras')

# Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
model_tflite = converter.convert()

# Save the TFLite model
with open("IF.tflite", "wb") as f:
    f.write(model_tflite)


Saved artifact at '/tmp/tmpivqoyolo'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10), dtype=tf.float32, name='keras_tensor_3')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  140164308884880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140164308887376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140164308885648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140164308880848: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
file_path = "Dataset Heart Disease.csv"  # Change path if needed
df = pd.read_csv(file_path)

# Drop unnecessary columns
df_cleaned = df.drop(columns=["Unnamed: 0"])

# Define features and target
X = df_cleaned.drop(columns=["target"])
y = df_cleaned["target"].astype(int)  # Ensure y is an integer

# Identify numerical columns for scaling
num_cols = ["age", "resting bps", "cholesterol", "max heart rate", "oldpeak"]

# Scale numerical features only
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Step 2: Split dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to NumPy arrays for training compatibility
X_train, X_test, y_train, y_test = X_train.values, X_test.values, y_train.values, y_test.values

# Step 3: Apply Isolation Forest for Anomaly Detection
iso_forest = IsolationForest(n_estimators=200, contamination=0.05, random_state=42)
anomaly_scores = iso_forest.fit_predict(X_train)

# Identify anomalies (outliers are labeled as -1)
anomalies = np.where(anomaly_scores == -1)[0]

# Remove detected anomalies from training data to improve accuracy
X_train_clean = np.delete(X_train, anomalies, axis=0)
y_train_clean = np.delete(y_train, anomalies, axis=0)

# Step 4: Train Keras Classifier
keras_model = models.Sequential([
    layers.Input(shape=(X_train_clean.shape[1],)),  # Input layer with the same shape as the features
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

keras_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
keras_model.fit(X_train_clean, y_train_clean, epochs=10, batch_size=32)

# Step 5: Evaluate the Keras model
y_pred = keras_model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int).flatten()

overall_accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Overall Model Accuracy (After Removing Anomalies): {overall_accuracy:.2f}")

# Step 6: Convert the Keras model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
tflite_model = converter.convert()

# Save the TFLite model to a file
with open('isolationforest.tflite', 'wb') as f:
    f.write(tflite_model)

print("TensorFlow Lite model saved as 'model.tflite'")


Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.5322 - loss: 0.7176
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6086 - loss: 0.6490
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6415 - loss: 0.6160
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6687 - loss: 0.6038
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7145 - loss: 0.5771
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7295 - loss: 0.5643
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7096 - loss: 0.5712 
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7642 - loss: 0.5336 
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import models, layers
from sklearn.metrics import accuracy_score
import tensorflow as tf

# Step 1: Load the dataset
file_path = "Dataset Heart Disease.csv"  # Adjust path if needed
df = pd.read_csv(file_path)

# Drop unnecessary columns
df_cleaned = df.drop(columns=["Unnamed: 0"])

# Define features and target
X = df_cleaned.drop(columns=["target"])
y = df_cleaned["target"].astype(int)  # Ensure target is an integer

# Identify numerical columns for scaling
num_cols = ["age", "resting bps", "cholesterol", "max heart rate", "oldpeak"]

# Scale numerical features
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Step 2: Split dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Create a Keras model for classification
model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),  # Input layer
    layers.Dense(64, activation='relu'),  # Hidden layer with 64 units and ReLU activation
    layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the Keras model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Step 5: Predict on the test set
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int).flatten()  # Convert probabilities to binary values

# Step 6: Compute overall accuracy and loss
accuracy = accuracy_score(y_test, y_pred_binary)
accuracy_loss = 1 - accuracy  # Loss is 1 - accuracy

# Print overall accuracy and loss with high precision
print(f"Model Accuracy: {accuracy:.5f}")
print(f"Model Loss: {accuracy_loss:.5f}")

# Step 7: Disease mapping (adjust based on your dataset)
# Mapping numeric target values to actual disease names
disease_mapping = {
    0: "Heart Attack",
    1: "Heart Failure",
    2: "Congenital Heart Disease",
    3: "Arrhythmia"  # Add any other disease types if necessary
}

# Map actual and predicted target values to disease names
y_test_mapped = [disease_mapping[val] for val in y_test]
y_pred_mapped = [disease_mapping[val] for val in y_pred_binary]

# Step 8: Calculate accuracy and loss for each row
accuracy_per_row = [1 if actual == predicted else 0 for actual, predicted in zip(y_test, y_pred_binary)]
loss_per_row = [1 - acc for acc in accuracy_per_row]

# Create a DataFrame for disease names with corresponding accuracy/loss
results_df = pd.DataFrame({
    'Disease Type': y_pred_mapped,  # Showing predicted disease name
    'Accuracy Rate': accuracy_per_row,
    'Loss Rate': loss_per_row
})

# Print all rows without rounding off accuracy and loss rates
pd.set_option('display.float_format', '{:.5f}'.format)  # Set precision for display

print("\nAll Test Cases with Disease Types, Accuracy, and Loss Rates:")
print(results_df)

# Step 9: Convert the Keras model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model to a file
with open('SVM.tflite', 'wb') as f:
    f.write(tflite_model)

print("TensorFlow Lite model saved as 'heart_disease_model.tflite'")


Epoch 1/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5238 - loss: 0.6883
Epoch 2/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6407 - loss: 0.6325
Epoch 3/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6613 - loss: 0.6183
Epoch 4/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6726 - loss: 0.5986
Epoch 5/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7089 - loss: 0.5835
Epoch 6/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7307 - loss: 0.5591
Epoch 7/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7169 - loss: 0.5649
Epoch 8/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7387 - loss: 0.5480
Epoch 9/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Load the dataset
df = pd.read_csv('/content/Dataset Heart Disease.csv')

# Print the column names to ensure correct referencing
print(df.columns)

# Normalize the features (replace these with your actual column names)
features = ["age", "resting bps", "cholesterol", "max heart rate", "oldpeak"]
target = 'target'  # Target column for prediction

# Normalize the features and the target separately
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(df[features])

# Normalize the target separately
scaler_target = MinMaxScaler(feature_range=(0, 1))
scaled_target = scaler_target.fit_transform(df[[target]])

# Prepare data for LSTM (assuming the time series data is already ordered)
look_back = 30  # Number of previous time steps to look at (e.g., last 30 time steps)

# Function to create datasets for LSTM
def create_dataset(data, target, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back)])
        y.append(target[i + look_back])
    return np.array(X), np.array(y)

X, y = create_dataset(scaled_features, scaled_target, look_back)

# Reshape the data for LSTM input (samples, time steps, features)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2])

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 3: Build the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))  # Predicting one value (e.g., target)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Step 4: Train the Model
model.fit(X_train, y_train, epochs=5, batch_size=32)

# Step 5: Evaluate the Model
predictions = model.predict(X_test)

# Inverse transform the predictions (only for the target)
predictions = scaler_target.inverse_transform(predictions)

# Inverse transform the actual values (only for the target)
y_test_actual = scaler_target.inverse_transform(y_test)

# Example: Calculate Mean Absolute Error
mae = mean_absolute_error(y_test_actual, predictions)
print(f'Mean Absolute Error: {mae}')

# Step 6: Convert the trained model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.allow_custom_ops = True
# Enable resource variables and set supported operations
converter.experimental_enable_resource_variables = True
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS
]

# Perform the conversion
tflite_model = converter.convert()

# Step 7: Save the TensorFlow Lite model to a file
with open('lstm_model.tflite', 'wb') as f:
    f.write(tflite_model)

print("TensorFlow Lite model saved as 'lstm_model.tflite'")


Index(['Unnamed: 0', 'age', 'sex', 'chest pain type', 'resting bps',
       'cholesterol', 'fasting blood sugar', 'resting ecg', 'max heart rate',
       'exercise angina', 'oldpeak', 'ST slope', 'target'],
      dtype='object')


  super().__init__(**kwargs)


Epoch 1/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 28ms/step - loss: 0.3007
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.2448
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 0.2387
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.2449
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.2389




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step
Mean Absolute Error: 0.5002403171623454
Saved artifact at '/tmp/tmpfzrdqhne'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 30, 5), dtype=tf.float32, name='keras_tensor_22')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  132030081302352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081300240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081299664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081300048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081298896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081298512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081299472: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081301200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132030081298704: TensorSpec(shape=(), dtype=t