env ml_py37

# Regression

![Regression](regression.png)

## Section 1

In [12]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import numpy as np
import pandas as pd

$$ A = \pi r^2 $$

### Regression

In [2]:
diabeties = load_diabetes()

## Section 2

In [4]:
#diabeties

In [6]:
# split the data
train_x, train_y = diabeties.data[:400], diabeties.target[:400]
test_x, test_y = diabeties.data[400:], diabeties.target[400:]

## Section 3

In [7]:
ols = LinearRegression()
ols.fit(train_x, train_y)
err = metrics.mean_squared_error(test_y, ols.predict(test_x))
r2 = metrics.r2_score(test_y, ols.predict(test_x))

## Section 4

![Step 4](regression_step4.png)

In [14]:
print("---OLS on diabeties dataset---")
print("Coefficents: ")
print("Intecept (b): %.2f"%ols.intercept_)
for i in range(len(diabeties.feature_names)):
    print(diabeties.feature_names[i]+"0: %.2f" %ols.coef_[i])
print("-"*20)
rmse = np.sqrt(err)
print("R squared: %.2f"%r2, ' MSE: %.2f \n' %err, ' RMSE: %.2f \n' %rmse)    

---OLS on diabeties dataset---
Coefficents: 
Intecept (b): 152.73
age0: 5.03
sex0: -238.41
bmi0: 521.63
bp0: 299.94
s10: -752.12
s20: 445.15
s30: 83.51
s40: 185.58
s50: 706.47
s60: 88.68
--------------------
R squared: 0.70  MSE: 1668.75 
  RMSE: 40.85 



## Logistic Regression

![Logistics Regression](logistics_regression.png)

$$p(X) = \frac{1}{1 + e^{-z}}$$

In [22]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
bc = load_breast_cancer()

In [23]:
# split the data
train_x, train_y = bc.data[:400], bc.target[:400]
test_x, test_y = bc.data[400:], bc.target[400:]

In [25]:
logit = LogisticRegression()
logit.fit(train_x, train_y)
err = metrics.mean_squared_error(test_y, logit.predict(test_x))
r2 = metrics.r2_score(test_y, logit.predict(test_x))

In [32]:
print("---OLS on diabeties dataset---")
print("Coefficents: ")
print("Intecept (b): %.2f"%logit.intercept_)
for i in range(len(bc.feature_names)):
    print(bc.feature_names[i]+"0: %.2f" %logit.coef_[0][i])
print("-"*20)
rmse = np.sqrt(err)
print("R squared: %.2f"%r2, ' MSE: %.2f \n' %err, ' RMSE: %.2f \n' %rmse)   

---OLS on diabeties dataset---
Coefficents: 
Intecept (b): 0.31
mean radius0: 1.70
mean texture0: 0.02
mean perimeter0: 0.22
mean area0: -0.01
mean smoothness0: -0.11
mean compactness0: -0.31
mean concavity0: -0.43
mean concave points0: -0.24
mean symmetry0: -0.12
mean fractal dimension0: -0.02
radius error0: -0.00
texture error0: 1.03
perimeter error0: -0.09
area error0: -0.09
smoothness error0: -0.01
compactness error0: -0.01
concavity error0: -0.04
concave points error0: -0.03
symmetry error0: -0.03
fractal dimension error0: 0.00
worst radius0: 1.27
worst texture0: -0.33
worst perimeter0: -0.28
worst area0: -0.02
worst smoothness0: -0.19
worst compactness0: -0.99
worst concavity0: -1.24
worst concave points0: -0.49
worst symmetry0: -0.44
worst fractal dimension0: -0.10
--------------------
R squared: 0.70  MSE: 0.05 
  RMSE: 0.23 

