In [None]:
#importing the required libraries
import pandas as pd
import pylab as pl
import numpy as np
import scipy.optimize as opt
import statsmodels.api as sm
from sklearn import preprocessing
'exec(% matplotlib inline)'
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import seaborn as sns


In [None]:
#importing the dataset
data=pd.read_csv('framingham.csv')
data.head()

In [None]:
#checking whether null values(NAN) are present in the dataset
data.isnull().sum()

In [None]:
#imputing the null values(NAN) with the mean
cols=['education','cigsPerDay','BPMeds','totChol','BMI','heartRate','glucose']
data[cols]= data[cols].fillna(data[cols].mean())

In [None]:
data.isnull().sum()

In [None]:
#dropping the unecessary features or columns
data.drop(['education'],axis=1,inplace=True)

In [None]:
X = np.asarray(data[['age', 'male', 'cigsPerDay',
                           'totChol', 'sysBP', 'glucose']])
y = np.asarray(data['TenYearCHD'])


In [None]:
#standardizing the independent variables
X=preprocessing.StandardScaler().fit_transform(X)

In [None]:
X

In [None]:
#checking the correlation between the features
correlation_matrix = data.corr()

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)

# Add title and display the plot
plt.title('Feature Collinearity Heatmap')
plt.show()

In [None]:
plt.figure(figsize=(10,8))
sns.countplot(x='TenYearCHD',data=data,palette='BuGn_r')
plt.show()

In [None]:
laste=data['TenYearCHD'].plot()
plt.show(laste)

In [None]:
#splitting the dataset into training and test set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=4)

In [None]:
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

In [None]:
# Evaluation and accuracy
from sklearn.metrics import accuracy_score
print('Accuracy of the model is =',
      accuracy_score(y_test, y_pred))

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred)
conf_matrix = pd.DataFrame(data = cm,
                           columns = ['Predicted:0', 'Predicted:1'],
                           index =['Actual:0', 'Actual:1'])

plt.figure(figsize = (8, 5))
sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = "Greens")

plt.show()
print('The details for confusion matrix is =')
print (classification_report(y_test, y_pred))

In [None]:
#applying support vector machine
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)

In [None]:
print('Accuracy of the model is with support vector =',
      accuracy_score(y_test, y_pred))

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred)
conf_matrix = pd.DataFrame(data = cm,
                           columns = ['Predicted:0', 'Predicted:1'],
                           index =['Actual:0', 'Actual:1'])

plt.figure(figsize = (8, 5))
sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = "Greens")

plt.show()
print('The details for confusion matrix after applying SVM is =')
print (classification_report(y_test, y_pred))

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(X_train,y_train)
y_pred=dt.predict(X_test)


In [None]:
print('Accuracy of the model after applying decision tree classifier is  =',
      accuracy_score(y_test, y_pred))

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred)
conf_matrix = pd.DataFrame(data = cm,
                           columns = ['Predicted:0', 'Predicted:1'],
                           index =['Actual:0', 'Actual:1'])

plt.figure(figsize = (8, 5))
sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = "Greens")

plt.show()
print('The details for confusion matrix after applying SVM is =')
print (classification_report(y_test, y_pred))