# Ref material
https://github.com/ageron/handson-ml

https://colab.research.google.com/drive/1I7DvsJEAPHbtEk6AacEbYUF5uVo8p1a

## SKLEARN Make_Moons Dataset

In [0]:
from sklearn.datasets import make_moons
dataset = make_moons(n_samples=1000, shuffle=True, noise=None, random_state=None)

data_points = dataset[0]
data_labels = dataset[1]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_points, data_labels, test_size=0.33, random_state=42)



In [0]:
print(data_points.shape)
print(data_labels.shape)

## Visualizing the generated data for classification

In [0]:
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 8))
plt.subplots_adjust(bottom=.05, top=.9, left=.05, right=.95)


plt.scatter(data_points[:, 0], data_points[:, 1], marker='o', c=data_labels,
            s=25, edgecolor='k')


# Decision Tree (classification)

## Using Sklearn function

In [0]:
#ref- https://scikit-learn.org/stable/modules/tree.html

from sklearn.tree import DecisionTreeClassifier

descision_tree = DecisionTreeClassifier(random_state=0,criterion="gini")

descision_tree.fit(X_train,y_train)

predictions = descision_tree.predict(X_test)

from sklearn.metrics import accuracy_score

score = accuracy_score(predictions,y_test)

print(score*100)

## Finding prediction probablities

In [0]:
print(descision_tree.predict([[2,5]]))
print(descision_tree.predict_proba([[2,5]]))

## Visualization

### Decision Tree Visualisation

In [0]:
from sklearn.externals.six import StringIO  
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus
dot_data = StringIO()
export_graphviz(descision_tree, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
Image(graph.create_png())

### Confusion Matrix

In [0]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true=y_test,y_pred=predictions).ravel())

### Confusion Matrix Visualization of Decision Tree Classification

In [0]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

class_names = ['0','1']

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = [classes[i] for i in unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

  
np.set_printoptions(precision=2)

# y_pred = clf.predict(X_test)

#print (class_names, unique_labels(y_pred,y_test), class_names[])

# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, predictions, classes=class_names, title='Confusion matrix')

plt.show()


In [0]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, predictions,average='weighted'))

## Interchanging Labels of Data

In [0]:

# Observe the same accuracy
y_test = 1-y_test
y_train = 1-y_train
descision_tree.fit(X_train,y_train)

predictions = descision_tree.predict(X_test)

from sklearn.metrics import accuracy_score

score = accuracy_score(predictions,y_test)

print(score*100)

In [0]:
# Observe the change in confusion matrix

plot_confusion_matrix(y_test, predictions, classes=class_names, title='Confusion matrix')

plt.show()

In [0]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, predictions,average='weighted'))

## Exercises
1. Increase the number of datapoints in make_moons dataset and test accuracy of classification using Decision Tree.
1. Change Criterion of Decision making to "Entropy" from "gini" and test accuracy of classification using Decision Tree.
1. Plot accuracies for different depths
1. Interchange labels and test accuracy

# Random Forest

## Execution

In [0]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=300,max_depth=3)
clf.fit(X_train,y_train)

predictions = clf.predict(X_test)

from sklearn.metrics import accuracy_score

score = accuracy_score(predictions,y_test)

print(score)

In [0]:
clf.score(X_test,y_test) #built in score function

In [0]:
estimator = clf.estimators_[5]
dot_data = StringIO()
export_graphviz(estimator, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
Image(graph.create_png())

## Visualization

### Confusion Matrix

In [0]:
plot_confusion_matrix(y_test, predictions, classes=class_names, title='Confusion matrix')

plt.show()

In [0]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, predictions,average='weighted'))

## Exercises

1. increase number of datapoints and test accuracy
1. increase n_estimators and test accuracy
1. plot accuracies for different depths

# AdaBoost

## Execution

In [0]:
from sklearn.ensemble import AdaBoostClassifier

adaboost = AdaBoostClassifier(n_estimators=10, random_state=0)
adaboost.fit(X_train,y_train)

predictions = adaboost.predict(X_test)

score = accuracy_score(predictions,y_test)

print(score*100)

## Visualization

In [0]:
plot_confusion_matrix(y_test, predictions, classes=class_names, title='Confusion matrix')

plt.show()

In [0]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, predictions,average='weighted'))

## Exercises
1. increase number of datapoints and test accuracy
1. increase n_estimators and test accuracy

# Gradient Boost

## Execution

In [0]:
from sklearn.ensemble import GradientBoostingClassifier

grad_boost = GradientBoostingClassifier(n_estimators=100)

grad_boost.fit(X_train,y_train)

predictions = grad_boost.predict(X_test)

score = accuracy_score(predictions,y_test)

print(score*100)


## Visualization

In [0]:
plot_confusion_matrix(y_test, predictions, classes=class_names, title='Confusion matrix')

plt.show()

In [0]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, predictions,average='weighted'))

## Exercises
1. increase number of datapoints and test accuracy
1. increase n_estimators and test accuracy

# Bagging

## Execution

In [0]:
from sklearn.ensemble import BaggingClassifier

bag = BaggingClassifier(n_estimators = 3)

bag.fit(X_train,y_train)

predictions = bag.predict(X_test)

score = accuracy_score(predictions,y_test)

print(score*100)

## Visualization

In [0]:
plot_confusion_matrix(y_test, predictions, classes=class_names, title='Confusion matrix')

plt.show()

In [0]:
from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(y_test, predictions,average='weighted'))

## Exercises
1. increase number of datapoints and test accuracy
1. increase n_estimators and test accuracy

# Exercises - Tree Methods

1. Execute the methods on MNIST digits data set (ref to Working Day 1.ipynb)
1. Plot PR and ROC curves on the make_moons and circles data sets
1. Change hyper parameters of the algorithms and plot