In [1]:
!pip install ipykernel
!python -m ipykernel install --user --name=venv
!pip install xgboost
!pip install scikit-learn
!pip install pandas
!pip install streamlit
!pip install joblib

Installed kernelspec venv in C:\Users\poorvinayak\AppData\Roaming\jupyter\kernels\venv


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [3]:
# Load data
df = pd.read_csv('heart.csv')
X = df.drop('target', axis=1)
y = df['target']


In [4]:
print(f"Dataset shape: {df.shape}")
print(f"Features: {X.columns.tolist()}")
print(f"Target distribution:\n{y.value_counts()}")

Dataset shape: (1025, 14)
Features: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']
Target distribution:
target
1    526
0    499
Name: count, dtype: int64


In [5]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [6]:
print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")


Training set size: 820
Test set size: 205


In [7]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [8]:
# Cell 6: Define base models with better parameters
xgb = XGBClassifier(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    random_state=42,
    eval_metric='logloss'
)
rf = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42
)

lr = LogisticRegression(
    max_iter=1000,
    random_state=42
)

In [9]:
# Stacking ensemble
stacked_model = StackingClassifier(
    estimators=[('xgb', xgb), ('rf', rf)],
    final_estimator=lr,
    passthrough=True,
    cv=5  # Add cross-validation
)

In [10]:
# Cell 8: Train the model
print("Training stacked model...")
stacked_model.fit(X_train_scaled, y_train)
print("‚úÖ Training complete!")

Training stacked model...
‚úÖ Training complete!


In [11]:
y_pred = stacked_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"\nüéØ Stacked Model Accuracy: {accuracy:.4f}")
print(f"\nüìä Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=['No Disease', 'Disease']))



üéØ Stacked Model Accuracy: 1.0000

üìä Classification Report:

              precision    recall  f1-score   support

  No Disease       1.00      1.00      1.00       100
     Disease       1.00      1.00      1.00       105

    accuracy                           1.00       205
   macro avg       1.00      1.00      1.00       205
weighted avg       1.00      1.00      1.00       205



In [12]:
# Cell 10: Save model and scaler
joblib.dump(stacked_model, 'child_heart_model.pkl')
joblib.dump(scaler, 'child_heart_scaler.pkl')
print("\nüíæ Model and scaler saved successfully!")



üíæ Model and scaler saved successfully!


In [13]:
# Cell 11: Create prediction function
def predict_child_heart_disease(user_input_dict):
    """
    Predict heart disease risk based on user input.
    
    Parameters:
    -----------
    user_input_dict : dict
        Dictionary containing all required features
        
    Returns:
    --------
    str : Prediction result with risk assessment
    """
    try:
        model = joblib.load('child_heart_model.pkl')
        scaler = joblib.load('child_heart_scaler.pkl')
        
        # Convert input to DataFrame
        input_df = pd.DataFrame([user_input_dict])
        
        # Ensure correct column order
        expected_columns = X.columns.tolist()
        input_df = input_df[expected_columns]
        
        # Scale and predict
        input_scaled = scaler.transform(input_df)
        prediction = model.predict(input_scaled)[0]
        probability = model.predict_proba(input_scaled)[0]
        
        if prediction == 1:
            return f"‚ù§Ô∏è High risk of heart disease ({probability[1]:.2%} probability). Please consult a DOCTOR!"
        else:
            return f"‚úÖ Low risk of heart disease ({probability[0]:.2%} probability). You are HEALTHY!"
            
    except Exception as e:
        return f"‚ùå Error during prediction: {str(e)}"

# Cell 12: Test with example
print("\nüß™ Testing prediction function...\n")

# Example test case
user_input_dict = {
    'age': 70, 
    'sex': 1, 
    'cp': 0, 
    'trestbps': 145, 
    'chol': 174,
    'fbs': 0, 
    'restecg': 1, 
    'thalach': 125, 
    'exang': 0,
    'oldpeak': 2.6, 
    'slope': 0, 
    'ca': 0, 
    'thal': 3
}

result = predict_child_heart_disease(user_input_dict)
print(f"Prediction: {result}")

# Additional test case (low risk)
user_input_dict_2 = {
    'age': 45, 
    'sex': 0, 
    'cp': 0, 
    'trestbps': 120, 
    'chol': 200,
    'fbs': 0, 
    'restecg': 0, 
    'thalach': 160, 
    'exang': 0,
    'oldpeak': 0.0, 
    'slope': 1, 
    'ca': 0, 
    'thal': 2
}

result_2 = predict_child_heart_disease(user_input_dict_2)
print(f"Prediction 2: {result_2}")


üß™ Testing prediction function...

Prediction: ‚úÖ Low risk of heart disease (98.91% probability). You are HEALTHY!
Prediction 2: ‚ù§Ô∏è High risk of heart disease (98.90% probability). Please consult a DOCTOR!


##