### 4.1) Import modules

In [None]:
from sklearn.datasets import load_breast_cancer, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn import ensemble
import pickle

### 4.2) Load Wisconsin breast cancer classification dataset
https://scikit-learn.org/stable/datasets/index.html#breast-cancer-dataset

In [None]:
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0)

print('Training features size: ', X_train.shape)
print('Testing features size: ', X_test.shape)
print('Training targets size: ', y_train.shape)
print('Testing targets size: ', y_test.shape)

### 4.3) Define accuracy function

In [None]:
def accuracy(y_true, y_pred):
    
    m = len(y_true)
    num_correct = 0
    
    for i in range(m):
        
        if y_pred[i] == y_true[i]:
            
            num_correct += 1
    
    accuracy = num_correct / m
    
    return accuracy

### 4.4) Train and evaluate Decision Tree classifier

In [None]:
dt_clf = tree.DecisionTreeClassifier(max_depth=5, random_state=0) # max_depth applies regularization
dt_clf.fit(X_train, y_train)

y_pred = dt_clf.predict(X_test)
dt_clf_accuracy = accuracy(y_test, y_pred)

print('Decision Tree Classifier accuracy: ', dt_clf_accuracy)

### 4.5) Save model using pickle

In [None]:
pickle.dump(dt_clf, open('Desicion_tree_classification_for_breast_cancer_dataset.p', 'wb'))

### 4.6) Visualize Decision Tree

In [None]:
import pydotplus
from IPython.display import Image
 
dot_data = tree.export_graphviz(dt_clf,
                                out_file = None,
                                max_depth = 5,
                                filled = False,
                                rounded = True,
                                leaves_parallel = False,
                                impurity = True,
                                proportion = False,
                                label = 'all',
                                class_names = ['Malignant','Benign'])

graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_png('DecisionTree.png')
Image('DecisionTree.png')

### 4.7) Train and evaluate Random Forest classifier 

In [None]:
rf_clf = ensemble.RandomForestClassifier(max_depth=10, random_state=0)
rf_clf.fit(X_train, y_train)

y_pred = rf_clf.predict(X_test)
rf_clf_accuracy = accuracy(y_test, y_pred)

print('Random Forest Classifier accuracy: ', rf_clf_accuracy)

### 4.8) Load Diabetes regression dataset
https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset

In [None]:
X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0)

print('Training features size: ', X_train.shape)
print('Testing features size: ', X_test.shape)
print('Training targets size: ', y_train.shape)
print('Testing targets size: ', y_test.shape)

### 4.9) Define error function

In [None]:
def MSE(y_true, y_pred):
    
    m = len(y_true)
    
    mse = sum((1 / m) * ((y_true-y_pred)**2))
    
    return mse

### 4.10) Train and evaluate a Decision Tree regression model

In [None]:
dt_reg = tree.DecisionTreeRegressor(max_depth=5, random_state=0)
dt_reg.fit(X_train, y_train)

y_pred = dt_reg.predict(X_test)
dt_reg_error = MSE(y_test, y_pred)

print('Decision Tree Regressor error: ', dt_reg_error)

### 4.11) Save model using pickle

In [None]:
pickle.dump(dt_reg, open('Desicion_tree_regression_for_diabetes_dataset.p', 'wb'))

### 4.12) Train and evaluate a Random Forest regression model

In [None]:
rf_reg = ensemble.RandomForestRegressor(max_depth=5, random_state=0)
rf_reg.fit(X_train, y_train)

y_pred = rf_reg.predict(X_test)
rf_reg_error = MSE(y_test, y_pred)

print('Random Forest Regressor error: ', rf_reg_error)