Importing the Dependencies

In [6]:
import sklearn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

ModuleNotFoundError: No module named 'sklearn'

Data Collection and Processing

In [None]:
# loading the csv data to a Pandas DataFrame
heart_data = pd.read_csv('heart_disease_data.csv')

In [None]:
# print first 5 rows of the dataset
heart_data.head()

In [None]:
# print last 5 rows of the dataset
heart_data.tail()

In [None]:
# number of rows and columns in the dataset
heart_data.shape

In [None]:
# getting some info about the data
heart_data.info()

In [None]:
# checking for missing values
heart_data.isnull().sum()

In [None]:
# statistical measures about the data
heart_data.describe()

In [None]:
# checking the distribution of Target Variable
heart_data['target'].value_counts()

1 --> Defective Heart

0 --> Healthy Heart

Splitting the Features and Target

In [None]:
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

In [None]:
print(X)

In [None]:
X.info()

In [None]:
print(Y)

### **Data Standardization**

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
standardized_data = scaler.transform(X)

In [None]:
print(standardized_data)

In [None]:
X = standardized_data
Y = heart_data['target']

In [None]:
print(X)
print(Y)

### **Splitting the Data into Training data & Test Data**

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

Model Training

Logistic Regression

In [None]:
model = LogisticRegression()

In [None]:
# training the LogisticRegression model with Training data
model.fit(X_train, Y_train)

*Model* Evaluation

Accuracy Score

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy on Test data : ', test_data_accuracy)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

print("--- Training and Evaluating Random Forest ---")

# Initialize the Random Forest model
# n_estimators=100 means it will build 100 decision trees
# random_state=42 ensures you get the same result every time you run it
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the same scaled training data
rf_model.fit(X_train, Y_train)

# Make predictions on the scaled test data
Y_pred_rf = rf_model.predict(X_test)

# Calculate the accuracy of the new model
rf_accuracy = accuracy_score(Y_test, Y_pred_rf)

# Print the accuracy score for the Random Forest model
print(f"Random Forest Test Accuracy: {rf_accuracy}")

### **Building a Predictive System**

In [None]:
input_data = (57,	0	,0	,120	,354	,0	,1	,163	,1	,0.6	,2	,0	,2)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)
print(input_data_as_numpy_array.dtype)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
print(input_data_reshaped.dtype)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

### **Saving the trained model**

In [None]:
import pickle

In [None]:
filename = 'heart_disease_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# loading the saved model
loaded_model = pickle.load(open('heart_disease_model.sav', 'rb'))

In [None]:
# The X variable is now a NumPy array after standardization, so it doesn't have the .columns attribute.
# We can use the columns from the original heart_data DataFrame (excluding the 'target' column)
# to label the features in the standardized data.

for i, column in enumerate(heart_data.drop(columns='target', axis=1).columns):
  print(f"Column {i}: {column}")

# Alternatively, if you just want to iterate through indices:
# for i in range(X.shape[1]):
#     print(f"Column index: {i}")

In [None]:
print(sklearn.__version__)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Ensure you have run the OneHotEncoder step from my previous guidance first
# The feature names should match the columns after one-hot encoding
# feature_names = X.columns # This was causing the error
# encoded_feature_names = list(X_train.columns) # This was also causing the error

# Get the feature names from the original DataFrame before standardization
original_feature_names = heart_data.drop(columns='target', axis=1).columns
encoded_feature_names = list(original_feature_names)


# Get the coefficients from the trained logistic regression model
coefficients = model.coef_[0]

# Create a DataFrame to view the feature importances
feature_importance = pd.DataFrame({'Feature': encoded_feature_names, 'Importance': coefficients})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(10, 8))
sns.barplot(x='Importance', y='Feature', data=feature_importance)
plt.title('Feature Importance from Logistic Regression')
plt.show()

print(feature_importance)

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
# Note: In your actual notebook, the 'scaler' and 'model' objects
# would already be trained and ready. Here, we're just showing the logic.

# --- Step 1: The Recommendation Function ---
# This is the function we developed earlier.
def generate_recommendations(prediction, data_input):
    """
    Generates personalized health recommendations based on model prediction.

    Args:
        prediction (int): The model's prediction (0 for No Disease, 1 for Disease).
        data_input (dict): A dictionary with feature names and the user's values.
    """
    print("--- Your Health Assessment ---")
    if prediction == 0:
        print("✅ Prediction: Low risk of heart disease.")
        print("\nRecommendations: Great job! Continue to maintain your healthy lifestyle.")
        print("- Continue regular check-ups with your doctor.")
        print("- Maintain a balanced diet and regular exercise.")
        return

    # If the prediction is 1 (high risk), provide specific advice
    print("❗️ Prediction: High risk of heart disease.")
    print("\nBased on our model, here are some key areas to discuss with your doctor:")

    # Chest Pain is a major factor
    if data_input.get('cp', 0) > 0:
        print("- Your reported chest pain type is a significant risk factor. It is crucial to discuss this with a cardiologist.")

    # Exercise-induced angina
    if data_input.get('exang', 0) == 1:
        print("- Exercise-induced angina ('exang') was noted. This should be investigated further with a stress test.")

    # Cholesterol and Blood Pressure
    if data_input.get('chol', 0) > 200 or data_input.get('trestbps', 0) > 130:
        print("- Focus on managing your cholesterol and blood pressure through diet, exercise, and possibly medication as advised by your doctor.")

    print("\nDisclaimer: This is an AI-generated assessment and not a substitute for professional medical advice.")


# --- Step 2: Simulate the End-to-End Workflow ---
# Imagine this is your main script or dashboard backend.

# Assume 'model' and 'scaler' are your already trained objects from the notebook.
# For this example to run standalone, we'll create dummy objects.
# In your real code, you would load your saved model and scaler.
if 'model' not in globals() or 'scaler' not in globals():
    print("Creating dummy model and scaler for demonstration purposes.")
    # This is just to make the example runnable. Use your actual trained objects.
    scaler = StandardScaler().fit(X_train) # Using X_train from your notebook
    model = LogisticRegression().fit(scaler.transform(X_train), Y_train)


# 1. Get new user input (imagine this came from a web form)
new_user_data = {
    'age': 52, 'sex': 1, 'cp': 0, 'trestbps': 125, 'chol': 212,
    'fbs': 0, 'restecg': 1, 'thalach': 168, 'exang': 0, 'oldpeak': 1.0,
    'slope': 2, 'ca': 2, 'thal': 3
}
print(f"Analyzing data for a {new_user_data['age']}-year-old...")


# 2. Format the input for the model
# Create a DataFrame, ensure column order is the same as training, then scale it.
input_df = pd.DataFrame([new_user_data])
input_scaled = scaler.transform(input_df)


# 3. Make a prediction
user_prediction = model.predict(input_scaled)[0] # Get the single prediction (0 or 1)


# 4. Generate personalized recommendations
generate_recommendations(user_prediction, new_user_data)