In [7]:
# Importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the dataset
heart_data = pd.read_csv('heart_disease_data.csv')

# Basic exploration
print(heart_data.head())
print(heart_data.tail())
print(heart_data.info())
print("Shape:", heart_data.shape)
print(heart_data.isnull().sum())
print(heart_data.describe())
print(heart_data['target'].value_counts())

# Splitting features and target
x = heart_data.drop(columns='target', axis=1)
y = heart_data['target']

print("Features (x):")
print(x)
print("Target (y):")
print(y)

# Splitting the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, stratify=y, random_state=2)

# Model training
model = LogisticRegression(max_iter=1000)  # increased max_iter to avoid convergence issues
model.fit(x_train, y_train)

# Accuracy on training data
x_train_prediction = model.predict(x_train)
training_data_accuracy = accuracy_score(y_train, x_train_prediction)
print('Training accuracy score:', training_data_accuracy)

# Accuracy on testing data
x_test_prediction = model.predict(x_test)
testing_data_accuracy = accuracy_score(y_test, x_test_prediction)
print('Testing accuracy score:', testing_data_accuracy)

# Prediction system for a new input

# Example input data (based on dataset feature order)
input_data = (56, 0, 1, 140, 294, 0, 0, 153, 0, 1.3, 1, 0, 2)

# Convert to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape for a single prediction
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Make prediction
prediction = model.predict(input_data_reshaped)

# Print the result clearly
if prediction[0] == 0:
    print('The person does NOT have a heart disease.')
else:
    print('The person HAS heart disease.')


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1      115      1      1.2   
3

