In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import numpy as nd
from numpy import random
import pandas as pd
from pandas import Series
from IPython.display import Image, HTML, display
import os
import sklearn
import sklearn.datasets

In [2]:
# Read the csv file into a pandas DataFrame
wine_data = pd.read_csv('./Data/wine.csv')
wine_data.head()

Unnamed: 0.1,Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,color,high_quality
0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red,0
1,1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red,0
2,2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red,0
3,3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red,1
4,4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red,0


In [4]:
# Assign the data to X and y
X = wine_data.drop({"color", "high_quality"}, axis=1)

y = wine_data["high_quality"].values.reshape(-1, 1)

print(X.shape, y.shape)

(6497, 13) (6497, 1)


In [None]:
# Add graphviz executables
os.environ['PATH'] = os.environ['PATH']+';'+os.environ['CONDA_PREFIX']+r"\Library\bin\graphviz"

## Random Tree Classifier

In [None]:
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)
print(X_train, y_train)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train.ravel())
rf.score(X_test, y_test)
feature_names = X.columns

In [None]:
# Calculate feature importance
importances = rf.feature_importances_
importances

In [None]:
sorted(zip(rf.feature_importances_, feature_names), reverse=True)

In [None]:
# Create a decision tree graph
import graphviz
dot_data = tree.export_graphviz(
    clf, out_file=None, 
    feature_names=importances,  
    class_names=feature_names,  
    filled=True, rounded=True,  
    special_characters=True)  

import pydotplus
graph = pydotplus.graph_from_dot_data(dot_data)
# graph.write_png('./Images/wine_quality.png')

graph = graphviz.Source(dot_data)  
graph

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## KNN

In [None]:
from sklearn.preprocessing import StandardScaler

# Create StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)

In [None]:
# Transform the training and testing data using the X_scaler and y_scaler models
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Loop through different k values to see which has the highest accuracy
train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train.ravel())
    train_score = knn.score(X_train_scaled, y_train.ravel())
    test_score = knn.score(X_test_scaled, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")
    
    
plt.plot(range(1, 20, 2), train_scores, marker='o')
plt.plot(range(1, 20, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.savefig('./Images/train_test_score.png')
plt.show()

In [None]:
knn = KNeighborsClassifier(n_neighbors=13)
knn.fit(X_train_scaled, y_train.ravel())
print('k=13 Test Acc: %.3f' % knn.score(X_test_scaled, y_test))

## SVM

In [None]:
from matplotlib import style
style.use("ggplot")
from matplotlib import rcParams
rcParams['figure.figsize'] = 10, 8
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.datasets.samples_generator import make_blobs
X, y = make_blobs(n_samples=40, centers=2, random_state=42, cluster_std=1.25)
plt.scatter(X[:, 0], X[:, 1], c=y, s=100, cmap="bwr");
plt.savefig('./Images/svm_scatter.png')
plt.show()

In [None]:
# Create SVC Model
from sklearn.svm import SVC 
model = SVC(kernel='poly')
model.fit(X, y)

In [None]:
# Plot the decision boundaries
x_min = X[:, 0].min()
x_max = X[:, 0].max()
y_min = X[:, 1].min()
y_max = X[:, 1].max()

XX, YY = np.mgrid[x_min:x_max, y_min:y_max]
Z = model.decision_function(np.c_[XX.ravel(), YY.ravel()])

# Put the result into a color plot
Z = Z.reshape(XX.shape)
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
            linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', edgecolor='k', s=100)
plt.savefig('./Images/decision_plot.png')
plt.show()
#plt.close()

In [None]:
X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=.95)
plt.scatter(X[:, 0], X[:, 1], c=y, s=100, cmap="bwr");
plt.savefig('./Images/distribution_blob.png')
plt.show()
plt.close

In [None]:
# Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Fit to the training data and validate with the test data
model = SVC(kernel='poly')
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [None]:
# Plot the decision boundaries
x_min = X[:, 0].min()
x_max = X[:, 0].max()
y_min = X[:, 1].min()
y_max = X[:, 1].max()

XX, YY = np.mgrid[x_min:x_max, y_min:y_max]
Z = model.decision_function(np.c_[XX.ravel(), YY.ravel()])

# Put the result into a color plot
Z = Z.reshape(XX.shape)
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
            linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', edgecolor='k', s=100)
plt.show()

In [None]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["blue", "red"]))

In [None]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

In [None]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["blue", "red"]))

## GridSearch Estimator

In [None]:
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [None]:
# Fit the model using GSE 
grid.fit(X_train, y_train)

In [None]:
# List the best parameters for this dataset
print(grid.best_params_)

In [None]:
# List the best score
print(grid.best_score_)

In [None]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test)

In [None]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["blue", "red"]))

## Deep Learning

In [None]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(1)

In [None]:
# Create Data
X, y = sklearn.datasets.make_circles(noise=0.05, factor=.2, random_state=42)

In [None]:
# Visualize Data
plt.scatter(X[:, 0], X[:, 1], c=y)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
# Create normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=2))
model.add(Dense(units=2, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])