In [8]:
# Classification
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from pandas import DataFrame
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
import csv


# linear models
from sklearn.linear_model import LogisticRegression

# Non-linear models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# ensemble models
from sklearn.ensemble import RandomForestClassifier

#random Number generation
from scipy.stats import uniform as sp_rand
from scipy.stats import uniform 

#for splitting data into training and test
from sklearn.model_selection import train_test_split

#for hyperparameter optimization
from sklearn.model_selection import RandomizedSearchCV

class Initialise: ##Class to initialise the train and test data sets
    # Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
        self.classifier_name = classifier_name
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
    
class LR_Class(Initialise): ##Inherting init method 
    #Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
        #Using class name again as __init__ method in child class overrides parent class
        Initialise.__init__(self, classifier_name,X_train, X_test, y_train, y_test)
    
    def lr_classifier(self): #Get other parameters from user
        #we will start with the simple Logistic regression first. 
        lr = LogisticRegression(max_iter=1000)
        #fitting the data on the model 
        lr.fit(self.X_train, self.y_train)
        #predicted output
        LR_predictions = lr.predict(self.X_test)
        #score
        lr_score=lr.score(self.X_test,self.y_test)
        return lr_score, LR_predictions
        
class KNN_Class(Initialise): ##Inherting init method 
    #Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
         #Using class name again as __init__ method in child class overrides parent class
        Initialise.__init__(self, classifier_name,X_train, X_test, y_train, y_test)
    
    def knn_classifier(self):
        knn = KNeighborsClassifier(n_neighbors = 3)
        knn.fit(self.X_train,self.y_train)
        knn_score = knn.score(self.X_test,self.y_test)
        knn_prediction = knn.predict(self.X_test)
        return knn_score, knn_prediction
    
class SVM_Class(Initialise): ##Inherting init method 
    #Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
         #Using class name again as __init__ method in child class overrides parent class
        Initialise.__init__(self, classifier_name,X_train, X_test, y_train, y_test)
        
    def svm_classifier(self): #Get other parameters from user
        svm = SVC(random_state = 1)
        svm.fit(self.X_train,self.y_train)
        svm_score = svm.score(self.X_test,self.y_test)
        svm_prediction = svm.predict(self.X_test)
        return svm_score, svm_prediction
      
        
class NB_Class(Initialise): ##Inherting init method 
    #Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
         #Using class name again as __init__ method in child class overrides parent class
        Initialise.__init__(self, classifier_name,X_train, X_test, y_train, y_test)
        
    def nb_classifier(self):
        nb = GaussianNB()
        nb.fit(self.X_train,self.y_train)
        nb_score = nb.score(self.X_test,self.y_test)
        nb_prediction = nb.predict(self.X_test)
        return nb_score, nb_prediction

class DT_Class(Initialise): ##Inherting init method 
    #Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
         #Using class name again as __init__ method in child class overrides parent class
        Initialise.__init__(self, classifier_name,X_train, X_test, y_train, y_test)
            
    def dt_classifier(self):
        dt = DecisionTreeClassifier()
        dt.fit(self.X_train,self.y_train)
        dt_score = dt.score(self.X_test,self.y_test)
        dt_prediction = dt.predict(self.X_test)
        return dt_score, dt_prediction

class RF_Class(Initialise): ##Inherting init method 
    #Initialise
    def __init__ (self, classifier_name,X_train, X_test, y_train, y_test):
        Initialise.__init__(self, classifier_name,X_train, X_test, y_train, y_test)
         #Using class name again as __init__ method in child class overrides parent class
        
    def rf_classifier(self): #Get other parameters from user
        rf = RandomForestClassifier(n_estimators = 22,random_state = 40)
        rf.fit(self.X_train,self.y_train)
        rf_score = rf.score(self.X_test,self.y_test)
        rf_prediction = rf.predict(self.X_test)
        return rf_score, rf_prediction
    
class Result: ##Class method to get final results and prediction for each classification algorithm
    @classmethod
    def get_results(self, classifier_name, score, y_test, y_predict):
        print()
        display (classifier_name + " Score : ")
        col={'Predictive Accuracy':score}
        models=[classifier_name]
        display(pd.DataFrame(data=col,index=models))
        print()
        print()
        print("The prediction results - Y Test vs Y Prediction")
        display(pd.DataFrame({'Actual': y_test, 'Predicted by '+classifier_name: y_predict}))
        
        
                      

# load the dataset, returns X and y elements
def load_dataset():
    #url = "C:/Users/Admin/Desktop/Iris-Data.csv"
    
    url = 'C:/Users/Admin/Desktop/Iris-Data.csv'
    #df = urllib2.urlopen(url)

    df = pd.read_csv(url) 
    df.info()
    X = df.drop(['Class'], axis =1)
    y = df['Class'].values
    return X, y

#training test split is specified by train_size parameter. 0.8 means 80% data is training. 
#a good practice is to split into 75:25 ratio, but we can always change that.
#source which helped me alot: https://www.kaggle.com/umutozdemir/comparison-of-different-regression-models

X, y = load_dataset()
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = 0.75, shuffle = True, random_state = 42)

score_list = [] #to keep scores of algorithms

classifier_name ="Random Forest"

if (classifier_name == "Logistic Regression"):
    LRobj = LR_Class(classifier_name,X_train, X_test, y_train, y_test)
    score, y_predict = LRobj.lr_classifier()
    Result.get_results(classifier_name, score, y_test, y_predict)
    
elif (classifier_name == "K-Nearest Neighbours"):
    KNNobj = KNN_Class(classifier_name,X_train, X_test, y_train, y_test)
    score, y_predict = KNNobj.knn_classifier()
    Result.get_results(classifier_name, score, y_test, y_predict)

elif (classifier_name == "State Vector Machine"):
    SVMobj = SVM_Class(classifier_name,X_train, X_test, y_train, y_test)
    score, y_predict = SVMobj.svm_classifier()
    Result.get_results(classifier_name, score, y_test, y_predict)
    
elif (classifier_name == "Naive Bayes"):
    NBobj = NB_Class(classifier_name,X_train, X_test, y_train, y_test)
    score, y_predict = NBobj.nb_classifier()
    Result.get_results(classifier_name, score, y_test, y_predict)
    
elif (classifier_name == "Decision Tree"):
    DTobj = DT_Class(classifier_name,X_train, X_test, y_train, y_test)
    score, y_predict = DTobj.dt_classifier()
    Result.get_results(classifier_name, score, y_test, y_predict)
    
elif (classifier_name == "Random Forest"):
    RFobj = RF_Class(classifier_name,X_train, X_test, y_train, y_test)
    score, y_predict = RFobj.rf_classifier()
    Result.get_results(classifier_name, score, y_test, y_predict)
    
else:
    print()
    print ("Check the Classification model selected")
            


#pr_dict = {'Logistic Regression' : lr_prediction,'KNN' : knn_prediction,'SVM' : svm_prediction,
           #'Naive Bayes' : nb_prediction,'Decision Tree' : dt_prediction, 'Random Forest' : rf_prediction}

#all_predictions = pd.DataFrame(pr_dict)

#all_predictions

#Score=[lr_score,knn_score,svm_score,nb_score,dt_score,rf_score]

#col={'Predictive Accuracy':Score}
#models=['Logistic Regression','KNN Classifier','Support Vector Machines','Naive Bayes','Decision Trees','Random Forest']
#df=pd.DataFrame(data=col,index=models)
#print(df)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Sepal Length  150 non-null    float64
 1   Sepal Width   150 non-null    float64
 2   Petal Length  150 non-null    float64
 3   Petal Width   150 non-null    float64
 4   Class         150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB



'Random Forest Score : '

Unnamed: 0,Predictive Accuracy
Random Forest,1.0




The prediction results - Y Test vs Y Prediction


Unnamed: 0,Actual,Predicted by Random Forest
0,Iris-versicolor,Iris-versicolor
1,Iris-setosa,Iris-setosa
2,Iris-virginica,Iris-virginica
3,Iris-versicolor,Iris-versicolor
4,Iris-versicolor,Iris-versicolor
5,Iris-setosa,Iris-setosa
6,Iris-versicolor,Iris-versicolor
7,Iris-virginica,Iris-virginica
8,Iris-versicolor,Iris-versicolor
9,Iris-versicolor,Iris-versicolor
