In [1]:
# Importing the Dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Data Collection and Processing

# Loading the CSV data into a Pandas DataFrame
heart_data = pd.read_csv('/content/dataset.csv')

# Display the first 5 rows of the dataset
print("First 5 rows of the dataset:")
print(heart_data.head())

# Display the last 5 rows of the dataset
print("\nLast 5 rows of the dataset:")
print(heart_data.tail())

# Number of rows and columns in the dataset
print("\nShape of the dataset:", heart_data.shape)

# Getting some info about the data
print("\nDataset Info:")
print(heart_data.info())

# Checking for missing values
print("\nMissing values in each column:")
print(heart_data.isnull().sum())

# Statistical measures about the data
print("\nStatistical summary of the dataset:")
print(heart_data.describe())

# Checking the distribution of Target Variable
print("\nTarget variable distribution:")
print(heart_data['target'].value_counts())
print("\n1 --> Defective Heart\n0 --> Healthy Heart")

# Splitting the Features and Target
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

print("\nFeature values (X):")
print(X.head())

print("\nTarget values (Y):")
print(Y.head())

# Splitting the Data into Training data & Test Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

print("\nShapes of datasets:")
print("X:", X.shape)
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)

# Model Training - Logistic Regression
model = LogisticRegression(max_iter=1000)  # increased max_iter to ensure convergence

# Training the LogisticRegression model with Training data
model.fit(X_train, Y_train)

# Model Evaluation - Accuracy Score

# Accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
print('\nAccuracy on Training data:', training_data_accuracy)

# Accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print('Accuracy on Test data:', test_data_accuracy)

# Building a Predictive System

# Sample input data
input_data = (62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2)

# Change the input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape the numpy array as we are predicting for only one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Predicting the result
prediction = model.predict(input_data_reshaped)

print("\nPrediction result:", prediction)
if prediction[0] == 0:
    print("The Person does NOT have Heart Disease")
else:
    print("The Person HAS Heart Disease")


First 5 rows of the dataset:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

Last 5 rows of the dataset:
     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0  

