#IMPORTING THE REQUIRED LIBRARIES


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt


#VECTORISING THE VALUES

In [None]:
dataset = pd.read_csv('/content/sample_data/heart_disease_dataset.csv')

dataset['Alcohol Intake'].fillna(dataset['Alcohol Intake'].mode()[0], inplace=True)

def convert_alcohol_intake(h):
    return 0 if h == 'Heavy' else 1
dataset['Alcohol Intake'] = dataset['Alcohol Intake'].map(convert_alcohol_intake)

def convert_num(h):
    return 0 if h == 'Yes' else 1

dataset['Family History'] = dataset['Family History'].map(convert_num)
dataset['Diabetes'] = dataset['Diabetes'].map(convert_num)
dataset['Obesity'] = dataset['Obesity'].map(convert_num)
dataset['Exercise Induced Angina'] = dataset['Exercise Induced Angina'].map(convert_num)

def chest_pain(j):
    if j == 'Atypical Angina': return 0
    elif j == 'Typical Angina': return 1
    elif j == 'Non-anginal Pain': return 2
    else: return 3
dataset['Chest Pain Type'] = dataset['Chest Pain Type'].map(chest_pain)

def gender(k):
    return 1 if k == 'Male' else 0
dataset['Gender'] = dataset['Gender'].map(gender)

def smoke(h):
    if h == 'Current': return 0
    elif h == 'Never': return 1
    else: return 2
dataset['Smoking'] = dataset['Smoking'].map(smoke)

dataset['Heart Disease'] = dataset['Heart Disease'].dropna()


#HANDLING THE MISSING VALUES

In [None]:
df = dataset.drop(columns='Heart Disease', axis=1)
target = dataset['Heart Disease']

if target.isnull().sum() > 0:
    print("There are missing values in the target column.")
    dataset.dropna(subset=['Heart Disease'], inplace=True)
    df = dataset.drop(columns='Heart Disease', axis=1)
    target = dataset['Heart Disease']

#STANDARDISING THE DATA

In [6]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

#SPLITTING THE DATA

In [7]:
x_train, x_test, y_train, y_test = train_test_split(scaled_data, target, test_size=0.2, stratify=target, random_state=3)


#USING SUPPOERT VECTOR CLASSIFICATION

In [None]:
from sklearn import svm
from sklearn.svm import SVC
classifier1=svm.SVC(kernel='linear') #support vector classification
cross_val_scores1 = cross_val_score(classifier1, x_train, y_train, cv=5, scoring='accuracy')
print(cross_val_scores1.mean())

#USING DECISION TREE CLASSIFIER

In [None]:
classifier3=DecisionTreeClassifier()
cross_val_scores3 = cross_val_score(classifier3, x_train, y_train, cv=5, scoring='accuracy')
print(cross_val_scores3.mean())

In [None]:
cross_val_scores3t = cross_val_score(classifier3, x_test, y_test, cv=5, scoring='accuracy')
print(cross_val_scores3t.mean())

#USING RANDOM FOREST CLASSIFIER

In [11]:
classifier2 = RandomForestClassifier(criterion="gini", max_depth=10, max_features="sqrt",
                                     min_samples_leaf=1, min_samples_split=7, n_estimators=400,
                                     class_weight='balanced')

In [None]:
cross_val_scores2 = cross_val_score(classifier2, x_train, y_train, cv=5, scoring='accuracy')
print(f"Cross-validation accuracy (training data): {cross_val_scores2.mean()}")

In [None]:
cross_val_scores2t = cross_val_score(classifier2, x_test, y_test, cv=5, scoring='accuracy')
print(cross_val_scores2.mean())

In [None]:
classifier2.fit(x_train, y_train)

y_pred = classifier2.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test data: {accuracy}")


#PREDICTIVE SYSTEM

In [None]:
random_sample = dataset.sample(n=100, random_state=42)

sample_features = random_sample.drop(columns='Heart Disease', axis=1)

predictions = classifier2.predict(scaler.transform(sample_features))

random_sample['Predicted Heart Disease'] = predictions
random_sample['Actual Heart Disease'] = random_sample['Heart Disease']

random_sample.to_csv('/content/predicted_heart_disease.csv', index=False)

print("Predictions for 100 entries have been stored in 'predicted_heart_disease.csv'.")

new_data = [50, 1, 200, 80, 1, 0, 1, 0, 0, 2, 1, 0, 1, 0, 1]
prediction = classifier2.predict([scaler.transform([new_data])[0]])  # Use the classifier to predict new data
print(f"Prediction for new data: {'Heart Disease' if prediction == 1 else 'No Heart Disease'}")


#TAKING DATA FROM THE USER

In [None]:
import datetime
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

def take_user_input():
    name = input("Patient's Name: ")
    age = int(input("Age: "))
    gender = int(input("Gender (Enter 1 for Male, 0 for Female): "))
    cholesterol = int(input("Cholesterol Level: "))
    blood_pressure = int(input("Blood Pressure Level: "))
    heart_rate = int(input("Heart Rate: "))
    smoking = int(input("Smoking Status (Enter 0 for Current, 1 for Never, 2 for Ex-smoker): "))
    alcohol_intake = int(input("Alcohol Intake (Enter 0 for Heavy, 1 for Light/None): "))
    exercise_hours = int(input("Exercise Hours per Week: "))
    family_history = int(input("Family History of Heart Disease (Enter 0 for Yes, 1 for No): "))
    diabetes = int(input("Diabetes (Enter 0 for Yes, 1 for No): "))
    obesity = int(input("Obesity (Enter 0 for Yes, 1 for No): "))
    stress_level = int(input("Stress Level (Enter a value between 0-10): "))
    blood_sugar = int(input("Blood Sugar Level (Enter 0 for High, 1 for Normal): "))
    exercise_induced_angina = int(input("Exercise Induced Angina (Enter 0 for Yes, 1 for No): "))
    chest_pain_type = int(input("Chest Pain Type (Enter 0 for Atypical Angina, 1 for Typical Angina, 2 for Non-anginal Pain, 3 for Asymptomatic): "))
    date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    data = {
        'Age': age,
        'Gender': gender,
        'Cholesterol': cholesterol,
        'Blood Pressure': blood_pressure,
        'Heart Rate': heart_rate,
        'Smoking': smoking,
        'Alcohol Intake': alcohol_intake,
        'Exercise Hours': exercise_hours,
        'Family History': family_history,
        'Diabetes': diabetes,
        'Obesity': obesity,
        'Stress Level': stress_level,
        'Blood Sugar': blood_sugar,
        'Exercise Induced Angina': exercise_induced_angina,
        'Chest Pain Type': chest_pain_type,
    }

    input_data = pd.DataFrame([data])
    input_scaled = scaler.transform(input_data)

    heart_disease_prediction = classifier2.predict(input_scaled)[0]

    data['Heart Disease Prediction'] = heart_disease_prediction
    data['Date'] = date_time.split()[0]
    data['Time'] = date_time.split()[1]

    return data

user_input = take_user_input()

user_df = pd.DataFrame([user_input])

user_df.to_csv('/content/patient_data.csv', mode='a', header=not pd.io.common.file_exists('/content/patient_data.csv'), index=False)

print("Patient data has been saved in 'patient_data.csv'.")
