In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import warnings
warnings.filterwarnings('ignore') 
sns.set_style('darkgrid')
mpl.rcParams['figure.figsize'] = [15,10]
from sklearn.model_selection import train_test_split

### Fitting and predeicting

In [3]:
import sklearn.datasets

newsgroups = sklearn.datasets.fetch_20newsgroups_vectorized()

X, y = newsgroups.data, newsgroups.target

Downloading 20news dataset. This may take a few minutes.
Downloading dataset from https://ndownloader.figshare.com/files/5975967 (14 MB)


In [4]:
X.shape

(11314, 130107)

In [5]:
y.shape

(11314,)

In [6]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1)

In [7]:
knn.fit(X,y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [8]:
y_pred = knn.predict(X)

### Model evaluation

In [9]:
knn.score(X,y)

0.9999116139296447

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [13]:
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [14]:
knn.score(X_test, y_test)

0.6758571933545422

```python
from sklearn.neighbors import KNeighborsClassifier

# Create and fit the model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

# Predict on the test features, print the results
pred = knn.predict(X_test)[0]
print("Prediction for test example 0:", pred)
```

### Applying logistic regression and SVM

In [15]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()

In [16]:
wine = sklearn.datasets.load_wine()
lr.fit(wine.data, wine.target)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [17]:
lr.score(wine.data, wine.target)

0.9719101123595506

In [18]:
lr.predict_proba(wine.data[:1])

array([[9.95108696e-01, 4.35738999e-03, 5.33914306e-04]])

In [19]:
from sklearn.svm import LinearSVC

svm = LinearSVC()

svm.fit(wine.data, wine.target)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [20]:
svm.score(wine.data, wine.target)

0.8033707865168539

In [21]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(wine.data, wine.target)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [22]:
svm.score(wine.data, wine.target)

1.0

In [24]:
from sklearn import datasets
digits = datasets.load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)

# Apply logistic regression and print scores
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(lr.score(X_train, y_train))
print(lr.score(X_test, y_test))

# Apply SVM and print scores
svm = SVC()
svm.fit(X_train, y_train)
print(svm.score(X_train, y_train))
print(svm.score(X_test, y_test))

0.9970304380103935
0.9577777777777777
1.0
0.5555555555555556


### Linear decision boundaries
