# Linear regression

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression

### 1. Load data

In [None]:
# Load the diabetes dataset
diabetes_X, diabetes_y = load_diabetes(return_X_y=True)

# Use only one feature: BMI
diabetes_X = diabetes_X[:,2].reshape((-1,1))
diabetes_X.shape

In [None]:
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-40]
diabetes_X_test = diabetes_X[-40:]

# Split the targets into training/testing sets
diabetes_y_train = diabetes_y[:-40]
diabetes_y_test = diabetes_y[-40:]

### 2. Create Model instance

In [None]:
# Create linear regression object
regr = LinearRegression()

### 3. Train model on training data

In [None]:
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

### 4. Predit labels on new data

In [None]:
# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)

### Evaluate

In [None]:
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

plt.show()

# Logistic regression

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

### 1. Load data

In [None]:
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

train_img, test_img, train_lbl, test_lbl = train_test_split(
    X, y, test_size=1/7.0, random_state=0)

In [None]:
plt.figure(figsize=(20,4))
for index, (image, label) in enumerate(zip(train_img[0:5], train_lbl[0:5])):
    plt.subplot(1, 5, index + 1)
    plt.imshow(np.reshape(image, (28,28)), cmap=plt.cm.gray)
    plt.title('Training: %i\n' % int(label), fontsize = 20)

### 2. Create Model instance

In [None]:
# all parameters not specified are set to their defaults
# default solver is incredibly slow thats why we change it
logisticRegr = LogisticRegression(solver = 'saga')

### 3. Train model on training data

In [None]:
logisticRegr.fit(train_img, train_lbl)

### 4. Predit labels on new data

In [None]:
# Predict for Multiple Observations (images) at Once
prediction = logisticRegr.predict(test_img[0:10])

### Evaluate

In [None]:
plt.figure(figsize=(20,4))
for index, (image, label, pred) in enumerate(zip(test_img[0:10], test_lbl[0:10], prediction)):
    plt.subplot(1, 10, index + 1)
    plt.imshow(np.reshape(image, (28,28)), cmap=plt.cm.gray)
    plt.title('Pred: %i'% int(pred), fontsize = 20)

In [None]:
score = logisticRegr.score(test_img, test_lbl)
print(score)

In [None]:
# Make predictions on entire test data
predictions = logisticRegr.predict(test_img)

index = 0
misclassifiedIndexes = []
for label, predict in zip(test_lbl, predictions):
    if label != predict: 
        misclassifiedIndexes.append(index)
    index +=1

plt.figure(figsize=(20,4))
for plotIndex, badIndex in enumerate(misclassifiedIndexes[0:5]):
    plt.subplot(1, 5, plotIndex + 1)
    plt.imshow(np.reshape(test_img[badIndex], (28,28)), cmap=plt.cm.gray)
    plt.title('Predicted: {}, Actual: {}'.format(predictions[badIndex], test_lbl[badIndex]), fontsize = 15)
