In [1]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np 
from sklearn import linear_model, metrics 
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# load the boston dataset 
dataframe =  pd.read_csv('heart.csv')

print(dataframe.head())

# defining feature matrix(X) and response vector(y) 
X = dataframe.iloc[:,:-1] 
y = dataframe.iloc[:,-1:]

#convert categorical to numeric
labelencoder_X=LabelEncoder()
z = X.iloc[:,-1:]
X.iloc[:,-1:] = labelencoder_X.fit_transform(z.values.ravel()) #array.ravel is equivalent to reshape(-1, order=order) 

#z score normalization
scaller = StandardScaler()
X = scaller.fit_transform(X)

# splitting X and y into training and testing sets  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) 

# create linear regression object 
reg = linear_model.LinearRegression() 

# train the model using the training sets 
reg.fit(X_train, y_train) 

# regression coefficients 
print('Coefficients: \n', reg.coef_) 

# variance score: 1 means perfect prediction 
print('Variance score: {}'.format(reg.score(X_test, y_test))) 

# plot for residual error 

## setting plot style 
plt.style.use('fivethirtyeight') 

## plotting residual errors in training data 
plt.scatter(reg.predict(X_train), reg.predict(X_train) - y_train, color = "green", s = 10, label = 'Train data') 

## plotting residual errors in test data 
plt.scatter(reg.predict(X_test), reg.predict(X_test) - y_test, color = "blue", s = 10, label = 'Test data') 

## plotting line for zero residual error 
plt.hlines(y = 0, xmin = 0, xmax = 50, linewidth = 2) 

## plotting legend 
plt.legend(loc = 'upper right') 

## plot title 
plt.title("Residual errors") 

## function to show plot 
plt.show() 


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   1       145   233    1        2      150      0      2.3      3   
1   67    1   4       160   286    0        2      108      1      1.5      2   
2   67    1   4       120   229    0        2      129      1      2.6      2   
3   37    1   3       130   250    0        0      187      0      3.5      3   
4   41    0   2       130   204    0        2      172      0      1.4      1   

   ca        thal  target  
0   0       fixed       0  
1   3      normal       1  
2   2  reversible       0  
3   0      normal       0  
4   0      normal       0  
Coefficients: 
 [[-0.01988385  0.03751326  0.06049251  0.0455686   0.01129942  0.00964204
  -0.00590382 -0.06753739  0.04679766  0.08674615  0.04398229  0.13220616
   0.02220608]]
Variance score: 0.454453412284089


<Figure size 640x480 with 1 Axes>