# My first `scikit-learn` notebook

In [None]:
import pandas as pd
import numpy as np
from random import choices
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

Load a dataset

In [None]:
forecast = pd.read_csv('data/Forecast.csv')
forecast.head()

Setup the `numpy` arrays to use to train classifiers

In [None]:
y = forecast.pop('Go-Out').values  # target feature
X = forecast.values                # training data
type(X),type(y)

Train a *k*-NN classifier

In [None]:
kNN = KNeighborsClassifier(n_neighbors=3) 
kNN.fit(X,y)
X_test = np.array([[8,70,11],
                   [8,69,15]])
kNN.predict(X_test)

All `sklearn` classifiers implement the `Estimator` API.

In [None]:
tree = DecisionTreeClassifier()
tree.fit(X,y)
tree.predict(X_test)

In [None]:
lr = LogisticRegression()
lr.fit(X,y)
lr.predict(X_test)

Swapping between classifiers (Estimators) makes model selection easy. 

In [None]:
cfrs = [kNN,tree,lr]
for cfr in cfrs:
    cfr.fit(X,y)
    print(cfr.predict(X_test))

## Preprocessing
All preprocessing modules implement the `Transformer`  API.

In [None]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(X)   # standardise to zero mean and unit variance
X_scaled = scaler.transform(X)
X_test_scaled = scaler.transform(X_test)
X_test_scaled

In [None]:
mm_scaler = preprocessing.MinMaxScaler()        # standardise to range [0,1]
mm_scaler.fit(X)
X_scaled = mm_scaler.transform(X)
X_test_scaled = mm_scaler.transform(X_test)
X_test_scaled