In [None]:
# Libraries
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# **Heart Disease UCI**
**Data Set Information:**
* sex = (1 = male; 0 = female)
* cp = Chest Pain Type
* trestbps = Resting Blood Pressure (in mm Hg on admission to the hospital)
* chol = serum cholestoral in mg/dl 
* fbs = (fasting blood sugar &gt; 120 mg/dl) (1 = true; 0 = false)
* restecg = resting electrocardiographic results
* thalach = maximum heart rate achieved
* exang = exercise induced angina (1 = yes; 0 = no)
* oldpeak = ST depression induced by exercise relative to rest 

In [None]:
# İmporting Data
heart_disease=pd.read_csv("../input/heart-disease-uci/heart.csv")
heart_disease.head()

In [None]:
#Correlation Map
plt.figure(figsize=(12,9))
sns.heatmap(heart_disease.corr(),annot=True,linewidths=.5,fmt='.1f')

In [None]:
# Linear Regression
x=heart_disease.age.values.reshape(-1,1)
y=heart_disease.chol.values.reshape(-1,1)

from sklearn.linear_model import LinearRegression
linear_regression=LinearRegression()
linear_regression.fit(x,y)

# prediction
print("predict :",linear_regression.predict([[25]]))

# visualization
y_head=linear_regression.predict(x)
plt.scatter(x,y,color="green")
plt.xlabel("Age")
plt.ylabel("Serum Cholestoral")
plt.plot(x,y_head,color="red")
plt.show()

# Evaluation Regression Models
from sklearn.metrics import r2_score
y_true=y_head
print("Accuracy Score :",r2_score(y,y_true))

In [None]:
# Multiple Linear Regression
x_=heart_disease.loc[:,["trestbps","chol","oldpeak"]]
x=x_.iloc[:,[0,1,2]].values
y=heart_disease.age.values.reshape(-1,1)

from sklearn.linear_model import LinearRegression
multi_linear=LinearRegression()
multi_linear.fit(x,y)

# prediction
print("predict:",multi_linear.predict([[120,250,1.4]]))

# Evaluation Regression Models
from sklearn.metrics import r2_score
y_true=multi_linear.predict(x)
print("Accuracy Score :",r2_score(y,y_true))



In [None]:
heart_disease.head()

In [None]:
# Polynomial Linear Regression

x=heart_disease.age.values.reshape(-1,1)
y=heart_disease.oldpeak.values.reshape(-1,1)

# Linear Regression
from sklearn.linear_model import LinearRegression
lin_reg=LinearRegression()
lin_reg.fit(x,y)

# prediction
print("predict :",lin_reg.predict([[25]]))

# linear visualization
plt.figure(figsize=(10,5))
y_head=lin_reg.predict(x)
plt.scatter(x,y,color="red")
plt.plot(x,y_head,color="green")
plt.xlabel("age")
plt.ylabel("oldpeak")
plt.title("Linear Regression")
plt.show()

# Polynomial Features Regression
from sklearn.preprocessing import PolynomialFeatures
polynomial_features=PolynomialFeatures(degree=10)
# degree=1 x^1
# degree=2 x^2
# degree=3 x^3
# degree=n x^n ...
x_=polynomial_features.fit_transform(x)
lin_reg2=LinearRegression()
lin_reg2.fit(x_,y)

# polynomial features visualization
plt.figure(figsize=(10,5))
y_head2=lin_reg2.predict(x_)
plt.scatter(x,y,color="red")
plt.plot(x,y_head2,color="black")
plt.xlabel("age")
plt.ylabel("oldpeak")
plt.title("Polynomial Features Regression")
plt.show()

#Evaluation Regression Models
from sklearn.metrics import r2_score
print("(Linear) Accuracy Score : {}".format(r2_score(y,y_head)))
print("(Polynomial Features) Accuracy Score : {}".format(r2_score(y,y_head2)))

# Maybe not good accuracy score but I think important things in here is improvement of score.

In [None]:
heart_disease.head()

In [None]:
# Decision Tree Regressor
x=heart_disease.trestbps.values.reshape(-1,1)
y=heart_disease.oldpeak.values.reshape(-1,1)

#Decision Tree
from sklearn.tree import DecisionTreeRegressor
dtr=DecisionTreeRegressor()
dtr.fit(x,y)

# prediction
print("predict :",dtr.predict([[150]]))

#visualization
x_=np.arange(min(x),max(x),0.01).reshape(-1,1)
y_head=dtr.predict(x_)
plt.scatter(x,y,color="blue")
plt.plot(x_,y_head,color="green")
plt.xlabel("trestbps")
plt.ylabel("oldpeak")
plt.show()

In [None]:
# Random Forest Regressor
x=heart_disease.trestbps.values.reshape(-1,1)
y=heart_disease.oldpeak.values.reshape(-1,1)
#Random Tree
from sklearn.ensemble import RandomForestRegressor 
rf=RandomForestRegressor(n_estimators=10,random_state=42) # n_estimators=created number of trees 
rf.fit(x,y)

# prediction
print("predict :",rf.predict([[150]]))

#visualization
x_=np.arange(min(x),max(x),0.01).reshape(-1,1)
y_head=rf.predict(x_)
plt.scatter(x,y,color="blue")
plt.plot(x_,y_head,color="green")
plt.xlabel("trestbps")
plt.ylabel("oldpeak")
plt.show()
