In [49]:

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from matplotlib import pyplot

import warnings
warnings.filterwarnings('ignore')

#!pip install --upgrade scikit-learn==0.20.3
#!pip install pydotplus
    

In [50]:
# Some functions to plot our points and draw the lines
def plot_points(features, labels, fix_margins=True):
    X = np.array(features)
    y = np.array(labels)
    spam = X[np.argwhere(y==1)]
    ham = X[np.argwhere(y==0)]
    if fix_margins:
        pyplot.xlim(0, 11)
        pyplot.ylim(0, 11)
    pyplot.scatter([s[0][0] for s in spam],
                [s[0][1] for s in spam],
                s = 100,
                color = 'cyan',
                edgecolor = 'k',
                marker = '^')
    pyplot.scatter([s[0][0] for s in ham],
                [s[0][1] for s in ham],
                s = 100,
                color = 'red',
                edgecolor = 'k',
                marker = 's')
    pyplot.xlabel('Lottery')
    pyplot.ylabel('Sale')
    pyplot.legend(['Spam','Ham'])

In [51]:
def plot_model(X, y, model, fix_margins=True):
    X = np.array(X)
    y = np.array(y)
    plot_points(X, y)
    plot_step = 0.01
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    if fix_margins:
        x_min=0
        y_min=0
        x_max=12
        y_max=12
    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                         np.arange(y_min, y_max, plot_step))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    pyplot.contourf(xx, yy, Z, colors=['red', 'blue'], alpha=0.2, levels=range(-1,2))
    pyplot.contour(xx, yy, Z,colors = 'k',linewidths = 3)
    pyplot.show()

In [52]:
def display_tree(dt):
    from six import StringIO 
    from IPython.display import Image  
    from sklearn.tree import export_graphviz
    import pydotplus
    dot_data = StringIO()
    export_graphviz(dt, out_file=dot_data,  
                    filled=True, rounded=True,
                    special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
    return Image(graph.create_png())

In [53]:
def plot_trees(model):
    estimators = gradient_boosting_model.estimators_
    for i in range(len(estimators)):
        tree.plot_tree(estimators[i][0])
        pyplot.show()
        #plot_model(new_X, new_y, estimators[i][0])

In [54]:
def plot_regressor(model, features, labels):
    x = np.linspace(0,85,1000)
    pyplot.scatter(features, labels)
    pyplot.plot(x, model.predict(x.reshape([-1,1])))
    pyplot.xlabel("Age")
    pyplot.ylabel("Days per week")
    pyplot.show()
    from matplotlib import pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

np.random.seed(0)

# Spam Email dataset
emails = np.array([
    [7,8,1],
    [3,2,0],
    [8,4,1],
    [2,6,0],
    [6,5,1],
    [9,6,1],
    [8,5,0],
    [7,1,0],
    [1,9,1],
    [4,7,0],
    [1,3,0],
    [3,10,1],
    [2,2,1],
    [9,3,0],
    [5,3,0],
    [10,1,0],
    [5,9,1],
    [10,8,1],
])
spam_dataset = pd.DataFrame(data=emails, columns=["Lottery", "Sale", "Spam"])
spam_dataset
        
features = spam_dataset[['Lottery', 'Sale']]
labels = spam_dataset['Spam']
plot_points(features, labels)

In [55]:
!pip install pydotplus

In [56]:
# Decision Tree
decision_tree_classifier = DecisionTreeClassifier(random_state=42)
decision_tree_classifier.fit(features, labels)
decision_tree_classifier.score(features, labels)

# Draw decision tree
display_tree(decision_tree_classifier)
# Decision tree as map
plot_model(features, labels, decision_tree_classifier)
plot_points(features, labels)

In [57]:
# To Train a Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
random_forest_classifier = RandomForestClassifier(random_state=0, n_estimators=5, max_depth=1)
random_forest_classifier.fit(features, labels)
random_forest_classifier.score(features, labels)

# To  plot the Random Forest Classifier
plot_model(features, labels, random_forest_classifier)
plot_points(features, labels)

In [None]:
#To tain a ADA Boost Classifier
from sklearn.ensemble import AdaBoostClassifier
adaBoost_Classifier = AdaBoostClassifier(n_estimators=5, learning_rate=0.5, random_state=42)
adaBoost_Classifier.fit(features, labels)
adaBoost_Classifier.score(features, labels)

# To plot a Ada Boost Classifier
plot_model(features, labels, adaBoost_Classifier)
plot_points(features, labels)

In [None]:
# To Train a Random Forest Classifier by using parameters discussed in Class
from sklearn.ensemble import RandomForestClassifier
random_forest_classifier = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)
random_forest_classifier.fit(features, labels)
random_forest_classifier.score(features, labels)
# To plot a Random Forest Classifier
plot_model(features, labels, random_forest_classifier)
plot_points(features, labels)

In [None]:
#To Train a Ada Boost Classifier by using parameters discussed in Lecture
from sklearn.ensemble import AdaBoostClassifier
adaBoost_Classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),n_estimators=200,algorithm="SAMME.R", learning_rate=0.5, random_state=42)
adaBoost_Classifier.fit(features, labels)
adaBoost_Classifier.score(features, labels)
# To plot a Ada Boost Classifier
plot_model(features, labels, adaBoost_Classifier)
plot_points(features, labels)