<a href="https://colab.research.google.com/github/Lakshmi-36/codealpha_tasks/blob/main/Disease_Prediction_From_Medical_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
data = pd.read_csv('heart.csv')

# Display the first few rows of the dataset
print("Dataset Preview:")
print(data.head())

# Preprocessing: Assuming 'Target' is the target variable and the rest are features
target_column = 'Target'  # Correct target column name
X = data.drop(target_column, axis=1)  # Features
y = data[target_column]               # Target variable

# Encode categorical variables using one-hot encoding
X = pd.get_dummies(X, columns=['ChestPain', 'Slope', 'Ca', 'Thal'], drop_first=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Example: Predicting a new case (replace values with actual patient data)
# Ensure the feature values match the one-hot encoded format
new_case_data = {
    'Age': 63, 'Sex': 1, 'RestBP': 145, 'Chol': 233, 'Fbs': 1, 'RestECG': 2, 'MaxHR': 150, 'ExAng': 0,
    'Oldpeak': 2.3,
    'ChestPain_nonanginal': 0, 'ChestPain_nontypical': 0, 'ChestPain_typical': 1,
    'Slope_2': 0, 'Slope_3': 1,
    'Ca_1': 0, 'Ca_2': 0, 'Ca_3': 0,
    'Thal_normal': 0, 'Thal_reversable': 0
}

new_case = pd.DataFrame([new_case_data])

# Get missing columns in test data
missing_cols = set( X_train.columns ) - set( new_case.columns )
for c in missing_cols:
    new_case[c] = 0
# Ensure the order of column is the same
new_case = new_case[X_train.columns]

prediction = model.predict(new_case)
print("\nPredicted Target for New Case:", prediction)


Dataset Preview:
   Age  Sex     ChestPain  RestBP  Chol  Fbs  RestECG  MaxHR  ExAng  Oldpeak  \
0   63    1       typical     145   233    1        2    150      0      2.3   
1   67    1  asymptomatic     160   286    0        2    108      1      1.5   
2   67    1  asymptomatic     120   229    0        2    129      1      2.6   
3   37    1    nonanginal     130   250    0        0    187      0      3.5   
4   41    0    nontypical     130   204    0        2    172      0      1.4   

   Slope  Ca        Thal  Target  
0      3   0       fixed       0  
1      2   3      normal       1  
2      2   2  reversable       1  
3      3   0      normal       0  
4      1   0      normal       0  

Confusion Matrix:
[[25  4]
 [ 5 27]]

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.85        29
           1       0.87      0.84      0.86        32

    accuracy                           0.85        61
   macro avg