# Decision Trees

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

In [2]:
iris_data = load_iris()
X = iris_data.data
y = iris_data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [3]:
model = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0)
model.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=3, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=0, splitter='best')

In [4]:
from sklearn.tree import _tree

def find_rules(tree, features):
    dt = tree.tree_
    def visitor(node, depth):
        indent = ' ' * depth
        if dt.feature[node] != _tree.TREE_UNDEFINED:
            print('{}if <{}> <= {}:'.format(indent, features[node], round(dt.threshold[node], 2)))
            visitor(dt.children_left[node], depth + 1)
            print('{}else:'.format(indent))
            visitor(dt.children_right[node], depth + 1)
        else:
            print('{}return {}'.format(indent, dt.value[node]))
    visitor(0, 1)

In [5]:
find_rules(model, iris_data.feature_names)

 if <sepal length (cm)> <= 0.8:
  return [[35.  0.  0.]]
 else:
  if <petal length (cm)> <= 4.75:
   return [[ 0. 34.  1.]]
  else:
   return [[ 0.  5. 37.]]


# Support Vector Machines

In [2]:
from sklearn.datasets import load_boston
boston_data = load_boston()

In [3]:
X = boston_data.data
y = boston_data.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
from sklearn import svm, linear_model
from sklearn.metrics import mean_squared_error

In [6]:
model = svm.SVR()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.02125525852558763

In [54]:
model = linear_model.LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model.score(X_test, y_test)

0.7258515818230054

# Polynomial Regression

In [55]:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

In [56]:
X = np.arange(4).reshape(2, 2)
X

array([[0, 1],
       [2, 3]])

In [57]:
poly = PolynomialFeatures(degree=2)
poly.fit_transform(X)

array([[1., 0., 1., 0., 0., 1.],
       [1., 2., 3., 4., 6., 9.]])

In [58]:
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

In [59]:
model = Pipeline([('poly', PolynomialFeatures(degree=3)),
                  ('linear', LinearRegression(fit_intercept=False))])

In [60]:
x = np.arange(5)
y = 3 - 2 * x + x ** 2 - x ** 3
np.stack([x, y])

array([[  0,   1,   2,   3,   4],
       [  3,   1,  -5, -21, -53]])

In [61]:
model.fit(x[:, None], y)

Pipeline(memory=None,
     steps=[('poly', PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)), ('linear', LinearRegression(copy_X=True, fit_intercept=False, n_jobs=1, normalize=False))])

In [62]:
model.named_steps['linear'].coef_

array([ 3., -2.,  1., -1.])

# Regularization

In [7]:
from sklearn.datasets import load_boston
boston_data = load_boston()
X = boston_data.data
y = boston_data.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
from sklearn import linear_model
import matplotlib.pyplot as plt
from pprint import pprint as _p

In [9]:
model = linear_model.LinearRegression()
model.fit(X_train, y_train)

for coef, label in zip(model.coef_, boston_data.feature_names):
    print("{:10.4f}".format(coef), label)

   -0.1281 CRIM
    0.0378 ZN
    0.0586 INDUS
    3.2401 CHAS
  -16.2223 NOX
    3.8935 RM
   -0.0128 AGE
   -1.4233 DIS
    0.2345 RAD
   -0.0082 TAX
   -0.9300 PTRATIO
    0.0119 B
   -0.5485 LSTAT


In [10]:
model = linear_model.Ridge(alpha=1)
model.fit(X_train, y_train)
for coef, label in zip(model.coef_, boston_data.feature_names):
    print("{:10.4f}".format(coef), label)

   -0.1217 CRIM
    0.0388 ZN
    0.0236 INDUS
    3.0266 CHAS
   -8.0750 NOX
    3.8997 RM
   -0.0196 AGE
   -1.3024 DIS
    0.2165 RAD
   -0.0090 TAX
   -0.8392 PTRATIO
    0.0121 B
   -0.5679 LSTAT


In [11]:
model = linear_model.Lasso(alpha=0.1)
model.fit(X_train, y_train)
for coef, label in zip(model.coef_, boston_data.feature_names):
    print("{:10.4f}".format(coef), label)

   -0.1147 CRIM
    0.0401 ZN
   -0.0000 INDUS
    1.6279 CHAS
   -0.0000 NOX
    3.7245 RM
   -0.0221 AGE
   -1.1368 DIS
    0.2071 RAD
   -0.0102 TAX
   -0.7520 PTRATIO
    0.0126 B
   -0.6088 LSTAT
