# ML Session #1 Demo



## Regression: Data loading + Train/test split



In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

X, y = load_boston(return_X_y=True)

tmp_X_train, X_test, tmp_y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(tmp_X_train, tmp_y_train, test_size=0.25, random_state=42)

print('X_train shape: ', X_train.shape)
print('X_validation shape: ', X_validation.shape)
print('X_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_validation shape: ', y_validation.shape)
print('y_test shape: ', y_test.shape)



## Regression: Basic plot



In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

plt.figure(figsize=(6.5,6))
plt.hist(y, alpha=0.7, color='firebrick', edgecolor='k', label='y (boston)')
plt.xlabel('# samples')
plt.ylabel('y value')
plt.title("Distribution of y (boston)")
plt.tight_layout()



## Regression: Training



In [None]:
from sklearn import linear_model

reg_model = linear_model.LinearRegression()
reg_model.fit(X_train, y_train)

print('Learned regression weights: \n', reg_model.coef_)



## Regression: Estimate error



In [None]:
from sklearn.metrics import mean_squared_error

y_pred = reg_model.predict(X_validation)
mse = mean_squared_error(y_validation, y_pred)

print('Baseline Linear Regression MSE (validation): ', mse)



## Classifier: Data generating



In [None]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline



In [None]:
num_samples = 1000
num_features = 2
X = np.random.uniform(low=0.0, high=10.0, size=(num_samples,num_features))
print('X shape  : ', X.shape)

y = np.zeros(num_samples).astype(int)
y[X.mean(axis=1) > 5] = 1
unique, counts = np.unique(y, return_counts=True)
print('y classes: ', dict(zip(unique, counts)))



## Classifier: Basic plot

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.xlabel('Feature X0')
plt.ylabel('Feature X1')
plt.tight_layout();



## Classifier: Train/test split



In [None]:
from sklearn.model_selection import train_test_split

# Train/validation/test split
tmp_X_train, X_test, tmp_y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(tmp_X_train, tmp_y_train, test_size=0.25, random_state=42)

print('X_train shape: ', X_train.shape)
print('X_validation shape: ', X_validation.shape)
print('X_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_validation shape: ', y_validation.shape)
print('y_test shape: ', y_test.shape)



## Classifier: kNN



In [None]:
from sklearn.neighbors import KNeighborsClassifier

kNN_model = KNeighborsClassifier(n_neighbors=5)
kNN_model.fit(X_train, y_train)
y_pred = kNN_model.predict(X_validation)



In [None]:
from sklearn.metrics import accuracy_score

print('kNN validation accuracy: ', accuracy_score(y_validation, y_pred))



## Classifier: Logistic Regression



In [None]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_validation)
print('Logistic Regression validation accuracy: ', accuracy_score(y_validation, y_pred))



## Confusion Matrix



In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_validation, y_pred)



In [None]:
import seaborn as sns

y_pred = lr_model.predict(X_test)

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True);



## Unsupervised: Clustering with KMeans



In [None]:
X = np.array([[1,0], [0,1], [1,1], [1,2], [2,1], [2,2],
              [6,5], [5,6], [6,6], [7,6], [6,7], [7,7]])
y = np.array([1,1,1,1,1,1,0,0,0,0,0,0])

plt.scatter(X[:, 0], X[:, 1], c=y);



In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2)
kmeans.fit(X)

kmeans.predict([[0,2], [6,4]])



In [None]:
kmeans.cluster_centers_

