### Create Classification Models utiliting Scikit Learn

#### Load the dataset and split into two datasets - one for features and one for the target variable

In [1]:
#import the datasets module from scikit learn
#import the numpy and pandas libraries
#import the train_test_split module from scikit learn
from sklearn import datasets
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


#create the wine matrix
#create the feature matrix X
#create the target vector y
wine = datasets.load_wine()
X = wine.data
y = wine.target

#### Split the datasets into train and test datasets

In [2]:
#split the data into train and test subsets
#for the test_size parameter input 0.2 to split the data by 20 percent
#for the random_state parameter input a 1, so the results are the same every time the model is run
#for the stratify parameter input y, so all class labels are included in both the training and test data sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=y)

#### Standardize the feature train and test datasets

In [3]:
#import StandardScaler from the scikit learn library
#call StandardScaler
#create a new feature matrix for the training data
#create a new feature matrix for the test data
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

#### Create a Logistic Regression Model utilizing Scikit Learn

In [4]:
#Create a logistic regression model
#import the logistic regression module from scikit learn
#import the accuracy score module from scikit learn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#call the logistic regression module
#for the regularization paramater 'C' input the value of 100.0
#for the random_state parameter input the value of 1
#call the fit function to fit the model to the training data sets
#create a new variable y_pred and assign the values from the predict function on the test data
#print the accuracy of the model by calling the accuracy score function
#accuracy score takes the arrays for y_test and y_pred
lr = LogisticRegression(C=100.00,random_state=1)
lr.fit(X_train_std,y_train)

#### Print the accuracy of the Logistic Regression Model

In [6]:
y_pred = lr.predict(X_test_std)
print('Accuracy: {:.6f}'.format(accuracy_score(y_test,y_pred)))

Accuracy: 0.972222


#### Create a Support  Vector Machine Model utilizing Scikit Learn

In [7]:
#Create a support vector machine model
#import the support vector module from scikit learn
from sklearn.svm import SVC

#call the support vector module
#for the kernel parameter input the value of 'linear'
#for the regularization paramater 'C' input the value of 100.0
#for the random_state parameter input the value of 1
#call the fit function to fit the model to the training data sets
#create a new variable y_pred and assign the values from the predict function on the test data
#print the accuracy of the model by calling the accuracy score function
#accuracy score takes the arrays for y_test and y_pred
svm = SVC(kernel='linear',C=100.00,random_state=1)
svm.fit(X_train_std,y_train)

#### Print the accuracy of the Support Vector Machine Model

In [8]:
y_pred = svm.predict(X_test_std)
print('Accuracy: {:.6f}'.format(accuracy_score(y_test,y_pred)))

Accuracy: 0.972222


#### Create a Decision Tree Model utilizing Scikit Learn

In [9]:
#Create a decision tree model
#import the decision tree module from scikit learn
from sklearn.tree import DecisionTreeClassifier

#call the decision tree module
#for the criterion paramater input the value of 'gini'
#for the max depth parameter input the value of 4
#for the random_state parameter input the value of 1
#call the fit function to fit the model to the training data sets
#create a new variable y_pred and assign the values from the predict function on the test data
#print the accuracy of the model by calling the accuracy score function
#accuracy score takes the arrays for y_test and y_pred
tree = DecisionTreeClassifier(criterion='gini',max_depth=4,random_state=1)
tree.fit(X_train_std,y_train)

#### Print the accuracy of the Decision Tree Model

In [10]:
y_pred = tree.predict(X_test_std)
print('Accuracy: {:.6f}'.format(accuracy_score(y_test,y_pred)))

Accuracy: 0.972222


#### Create a Random Forest Model utilizing Scikit Learn

In [11]:
#Create a random forest model
#import the decision tree module from scikit learn
from sklearn.ensemble import RandomForestClassifier

#call the random forest module
#for the criterion paramater input the value of 'gini'
#for the n_estimators parameter input the value of 50
#for the random_state parameter input the value of 1
#for the n_jobs parameter input the value of 3
#call the fit function to fit the model to the training data sets
#create a new variable y_pred and assign the values from the predict function on the test data
#print the accuracy of the model by calling the accuracy score function
#accuracy score takes the arrays for y_test and y_pred
forest = RandomForestClassifier(criterion='gini',n_estimators=50,random_state=1,n_jobs=3)
forest.fit(X_train_std,y_train)

#### Print the accuracy of the Random Forest Model

In [12]:
y_pred = forest.predict(X_test_std)
print('Accuracy: {:.6f}'.format(accuracy_score(y_test,y_pred)))

Accuracy: 1.000000


#### Create a K-Nearest Neighbors Model utilizing Scikit Learn

In [13]:
#create the k-nearest neighbors model
#import the k-nearest neighbors module
from sklearn.neighbors import KNeighborsClassifier

#call the k-nearest neighbors module
#for the n_neighbors paramater input the value of 5
#for the distance measure parameter 'p' input the value of 2 (for Euclidean distance)
#for the metric parameter input 'minkowski'
#call the fit function to fit the model to the training data sets
#create a new variable y_pred and assign the values from the predict function on the test data
#print the accuracy of the model by calling the accuracy score function
#accuracy score takes the arrays for y_test and y_pred
knn = KNeighborsClassifier(n_neighbors=5,p=2,metric='minkowski')
knn.fit(X_train_std,y_train)

#### Print the accuracy of the Random Forest Model

In [14]:
y_pred = knn.predict(X_test_std)
print('Accuracy: {:.6f}'.format(accuracy_score(y_test,y_pred)))

Accuracy: 0.972222
