<a href="https://colab.research.google.com/github/SanjanaReddy1407/EV43_ANNONYMOUS/blob/main/checking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load the dataset (Assuming the file name matches your environment)
data = pd.read_csv('/content/harshal_git.csv')

# 1. Rename columns as per your requirements
data.columns = ['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholesterol',
                'fasting_blood_sugar', 'rest_ecg', 'max_heart_rate_achieved',
                'exercise_induced_angina', 'st_depression', 'st_slope',
                'num_major_vessels', 'thalassemia', 'target']

# 2. Map categorical numbers to strings for better interpretability
data['sex'] = data['sex'].map({0: 'female', 1: 'male'})
data['chest_pain_type'] = data['chest_pain_type'].map({1: 'typical', 2: 'atypical', 3: 'non-anginal', 4: 'asymptomatic'})
data['thalassemia'] = data['thalassemia'].map({1: 'normal', 2: 'fixed', 3: 'reversible'})

# 3. Create dummy variables (One-Hot Encoding)
df_encoded = pd.get_dummies(data, drop_first=True)

# 4. Split Features (X) and Target (y)
X = df_encoded.drop('target', axis=1)
y = df_encoded['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Initialize and fit the model
model = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=0)
model.fit(X_train, y_train)

# Generate predictions
y_pred = model.predict(X_test)

# CORRECTED METRICS
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

# Correct Formulas
# Sensitivity (Recall) = TP / (TP + FN)
# Specificity = TN / (TN + FP)
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Testing Accuracy: {model.score(X_test, y_test):.4f}")
print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")

Testing Accuracy: 0.8361
Sensitivity: 0.8235
Specificity: 0.8519


In [None]:
!pip install eli5 shap pdpbox



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load data - ensure the file path is correct for your environment
data = pd.read_csv('/content/harshal_git.csv')

# Clean column names and map categorical values for better interpretation
data.columns = ['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholesterol',
                'fasting_blood_sugar', 'rest_ecg', 'max_heart_rate_achieved',
                'exercise_induced_angina', 'st_depression', 'st_slope',
                'num_major_vessels', 'thalassemia', 'target']

data['sex'] = data['sex'].map({0: 'female', 1: 'male'})
data['chest_pain_type'] = data['chest_pain_type'].map({1: 'typical', 2: 'atypical', 3: 'non-anginal', 4: 'asymptomatic'})
data['thalassemia'] = data['thalassemia'].map({1: 'normal', 2: 'fixed', 3: 'reversible'})

# One-hot encoding
df_encoded = pd.get_dummies(data, drop_first=True)

X = df_encoded.drop('target', axis=1)
y = df_encoded['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

model = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=0)
model.fit(X_train, y_train)

# Corrected Metrics Calculation
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Accuracy: {model.score(X_test, y_test):.2f}")
print(f"Sensitivity (True Positive Rate): {sensitivity:.2f}")
print(f"Specificity (True Negative Rate): {specificity:.2f}")

Accuracy: 0.84
Sensitivity (True Positive Rate): 0.82
Specificity (True Negative Rate): 0.85


In [None]:
import eli5
from eli5.sklearn import PermutationImportance
import shap

# Permutation Importance
perm = PermutationImportance(model, random_state=0).fit(X_test, y_test)
# In a Jupyter cell, use the following to display:
# eli5.show_weights(perm, feature_names = X_test.columns.tolist())

# SHAP Analysis
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

def patient_report(index):
    patient_data = X_test.iloc[[index]]
    sv = explainer.shap_values(patient_data)
    shap.initjs()
    return shap.force_plot(explainer.expected_value[1], sv[1], patient_data)

