# Supervised learning models

## Linear classification: SGDClassifier

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.25, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# [...] Train the model, get the scores for training and test

## Support Vector Machine

In [3]:
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

faces = fetch_olivetti_faces()

X_train, X_test, y_train, y_test = train_test_split(
    faces.data, faces.target, test_size=0.25, random_state=0)

# [...] Train the model, get the scores for training and test

## Naive Bayes

In [5]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, HashingVectorizer, CountVectorizer
from sklearn.naive_bayes import MultinomialNB

news = fetch_20newsgroups(subset="all")
X_train, X_test, y_train, y_test = train_test_split(
    news.data, news.target, test_size=0.25, random_state=0)

# A vectorizer converts a sequence of words into a numeric vector
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# [...] Train the model, get the scores for training and test
# Try to change the vectorizer

## Decision Trees
(See the preprocessing notebook)

## Random Forests

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandokmForestClassifier

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.25, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# [...] Train the model, get the scores for training and test

# Unsupervised learning models

## Clustering with K-means

In [None]:
from sklearn.datasets import load_digits
from sklearn.preprocessing import scale
from sklearn.cluster import KMeans

digits = load_digits()
data = scale(digits.data)

# [...] Train the model, get the scores
# In this case, we only have the score for all the data, and we evaluate the
# model based on distances from each point to the centroid.
