In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
#load the data
heart_data = pd.read_csv('heart.csv')
print(heart_data)

**BASIC INFORMATION ABOUT DATASET**

In [None]:
# print first 5 rows of the dataset
heart_data.head()

In [None]:
# print last 5 rows of the dataset
heart_data.tail()

In [None]:
# number of rows and columns in the dataset
heart_data.shape

In [None]:
# getting some info about the data
heart_data.info()

In [None]:
# checking for missing values
heart_data.isnull().sum()

In [None]:
# statistical measures about the data
heart_data.describe()

***DATA VISUALIZATION***










1.   **PIE CHART**




In [None]:
import matplotlib.pyplot as plt
import pandas as pd
heart_data = pd.read_csv('heart.csv');
labels = '1', 'other than 1'
sizes = [(heart_data['Diabetic'] == 1).sum(), (heart_data['Diabetic'] != 1).sum()]
colors = ['red', 'lightgreen']

fig, ax = plt.subplots(figsize=(5,5))
ax.pie(sizes,labels = labels, colors=colors)

plt.title('Diabetes')
plt.show()





2.  **BAR GRAPH**



In [None]:
import matplotlib.pyplot as plt
import pandas as pd
heart_data = pd.read_csv('heart.csv');
labels = ['30', 'More than 30']
sizes = [(heart_data['PhysicalHealth'] == 30).sum(), (heart_data['PhysicalHealth'] != 30).sum()]
colors = ['red', 'lightgreen']

x = [0,1]
fig,ax = plt.subplots(figsize=(4,4))
ax.bar(x, sizes, color=colors, align='center')
plt.xticks(x,labels)

plt.title('Physical Health')
plt.show()


**3. HEATMAP**



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

heart_data = pd.read_csv('heart.csv');

sns.set(rc={"figure.figsize":(10,7)})
sns.heatmap(heart_data.corr(), annot=True, cmap="coolwarm")
plt.show()

4. **HISTOGRAM**



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

heart_data = pd.read_csv('heart.csv');
heart_data_new = pd.crosstab(index=heart_data['MentalHealth'], columns=['MentalHealth'], margins=True)
plt.hist(heart_data_new['MentalHealth'], bins=5, color='red', alpha=0.5)
plt.title('Mental Health of Person')
plt.xlabel("Data")
plt.ylabel("Frequency")
plt.show()

5. **CONTINGENCY TABLE**



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

heart_data = pd.read_csv('heart.csv');
pd.crosstab(index=heart_data['PhysicalHealth'], columns=heart_data['MentalHealth'])

In [None]:
# checking the distribution of Target Variable
heart_data['HeartDisease'].value_counts()

**SPLITTING FEATURES AND TARGET**

In [None]:
#Splitting the Features and Target
X = heart_data.drop(columns='HeartDisease', axis=1)
Y = heart_data['HeartDisease']

In [None]:
print(X)

In [None]:
print(Y)

In [None]:
#Splitting the Data into Training data & Test Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

**MODEL TRAINING**

**LOGISITC REGRESSION**

In [None]:
#model training  LOGISTIC REGRESSION
model = LogisticRegression()
model.fit(X_train, Y_train)

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
input_data = (62,0,0,140,268,0,0,160,0,3.6,0,2,2,1,4,5,9)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

**NAVIE BAYES** 
model prediction and accuracy
---



In [None]:
from sklearn.naive_bayes import GaussianNB 
nb = GaussianNB() 
nb.fit(X_train, Y_train)

In [None]:
# accuracy on training data
X_train_prediction = nb.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
input_data = (99,0,0,140,268,0,0,160,0,3,8,9,12,9,67,9,9)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = nb.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

**RandomForestClassifier**


MODEL ACCURACY AND PREDICTION
---



In [None]:
from sklearn.ensemble import RandomForestClassifier
regressor = RandomForestClassifier(n_estimators = 100, random_state = 0)
regressor.fit(X_train, Y_train)

In [None]:
from sklearn.metrics import accuracy_score
X_test_prediction_rfr = regressor.predict(X_test)
accuracy_rfr = accuracy_score(X_test_prediction_rfr, Y_test)
print(" Random Forest Classifier: " + str(accuracy_rfr * 100))

In [None]:
input_data = (2,12,2,1,2,52,2,2,2,2,2,2,2,2,2,2,7)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = regressor.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

**DECISION TREE CLASSIFIER**

MODEL ACCURACY AND PREDICTION
---




In [None]:
from sklearn.tree import DecisionTreeClassifier
dectree = DecisionTreeClassifier(criterion = 'entropy', random_state = 42)
dectree.fit(X_train, Y_train)

In [None]:
from sklearn.metrics import accuracy_score
X_test_prediction_dec = dectree.predict(X_test)
accuracy_dec = accuracy_score(X_test_prediction_dec, Y_test)
print(" Decision Tree Classifier: " + str(accuracy_dec * 100))

In [None]:
input_data = (2,12,2,1,2,52,8,9,12,8,7,45,89,0,7,4,6)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = dectree.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

**K NEIGHBORS CLASSIFIER**


MODEL ACCURACY AND PREDICTION
---



In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 24, metric = 'minkowski', p = 2)
knn.fit(X_train, Y_train)

In [None]:
from sklearn.metrics import accuracy_score
X_test_prediction_knn = knn.predict(X_test)
accuracy_knn = accuracy_score(X_test_prediction_knn, Y_test)
print(" KNN: " + str(accuracy_knn * 100))

In [None]:
input_data = (2,5,6,8,4,7,9,12,67,8,53,1,5,78,1,4,7)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = knn.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a Heart Disease')
else:
  print('The Person has Heart Disease')

In [None]:
import pickle
# Writing different model files to file
with open( 'heart.sav', 'wb') as f:
    pickle.dump(model,f)