**1. Importing Dependencies**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

**2. Loading the dataset**

In [None]:
flower_data = pd.read_csv('iris.data')

**3. Summarizing the dataset**


Dimensions of the dataset



In [None]:
flower_data.shape

Peek at the data itself

In [None]:
flower_data.head()

Checking for blanks

In [None]:
flower_data.isnull().sum()

Statistical summary of all attributes

In [None]:
flower_data.describe()

Breakdown of the data by the class variable

In [None]:
flower_data.groupby('Iris-setosa').size()

**4.	Visualizing the dataset**

Univariate Plot for '5.1' (lineplot)

In [None]:
sns.lineplot(flower_data['5.1'])

Univariate Plot for '3.5' (barplot)

In [None]:
sns.barplot(flower_data['3.5'])

Univariate Plot for '1.4' (jointplot)

In [None]:
sns.jointplot(flower_data['1.4'])

Univariate Plot for '0.2' (histplot)

In [None]:
sns.histplot(flower_data['0.2'])

Univariate Plot for 'Iris-setosa' (pointplot)

In [None]:
sns.pointplot(flower_data['Iris-setosa'])

Multivariate plot for '5.1' vs 'Iris-setosa' (boxplot)

In [None]:
sns.boxplot(flower_data, x='5.1', y='Iris-setosa')

Multivariate plot for '3.5' vs 'Iris-setosa' (boxplot)

In [None]:
sns.boxplot(flower_data, x='3.5', y='Iris-setosa')

Multivariate plot for '1.4' vs 'Iris-setosa' (boxplot)

In [None]:
sns.boxplot(flower_data, x='1.4', y='Iris-setosa')

Multivariate plot for '0.2' vs 'Iris-setosa' (boxplot)

In [None]:
sns.boxplot(flower_data, x='0.2', y='Iris-setosa')

**5.	Evaluating some algorithms**

Separate out a validation dataset

In [None]:
Y = flower_data['Iris-setosa']
X = flower_data.drop('Iris-setosa', axis=1)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify = Y, random_state=2)

In [None]:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

Logistic Regression (LR) Model

In [None]:
model_lr = LogisticRegression()
model_lr.fit(X_train, Y_train)
lr_score = model_lr.score(X_test, Y_test)
print(f"Logistic Regression Accuracy: {lr_score:.2f}")

Linear Discriminant Analysis (LDA) Model

In [None]:
model_lda = LinearDiscriminantAnalysis()
model_lda.fit(X_train, Y_train)
lda_score = model_lda.score(X_test, Y_test)
print(f"Linear Discriminant Analysis Accuracy: {lda_score:.2f}")

K-Nearest Neighbors (KNN) Model

In [None]:
model_knn = KNeighborsClassifier()
model_knn.fit(X_train, Y_train)
knn_score = model_knn.score(X_test, Y_test)
print(f"K-Nearest Neighbors Accuracy: {knn_score:.2f}")

Classification and Regression Trees (CART) Model

In [None]:
model_cart = DecisionTreeClassifier()
model_cart.fit(X_train, Y_train)
cart_score = model_cart.score(X_test, Y_test)
print(f"CART Accuracy: {cart_score:.2f}")

Gaussian Naive Bayes (NB) Model

In [None]:
model_nb = GaussianNB()
model_nb.fit(X_train, Y_train)
nb_score = model_nb.score(X_test, Y_test)
print(f"Gaussian Naive Bayes Accuracy: {nb_score:.2f}")

Support Vector Machines (SVM) Model

In [None]:
model_svm = SVC()
model_svm.fit(X_train, Y_train)
svm_score = model_svm.score(X_test, Y_test)
print(f"Support Vector Machines Accuracy: {svm_score:.2f}")

Comparing the accuracy of the models

In [None]:
accuracy = [lr_score,lda_score,knn_score,cart_score,nb_score,svm_score]
algorithms = ['Logistic Regression','Linear Discriminant Analysis','K-Nearest Neighbors','Classification and Regression Trees','Gaussian Naive Bayes','Support Vector Machines']
score_df = pd.DataFrame({'Algorithms': algorithms, 'Accuracy Score': accuracy})
score_df

Selecting best model

In [None]:
max_index = accuracy.index(max(accuracy))
best_model = algorithms[max_index]
print(f"Best model: {best_model} \nAccuracy: {max(accuracy):.2f}")

Plotting the accuracies

In [None]:
sns.pointplot(data=score_df, x='Accuracy Score', y='Algorithms')
for i in range(len(score_df)):
    plt.text(score_df['Accuracy Score'][i], i, f'{score_df["Accuracy Score"][i]:.2f}',
             ha='center', va='bottom')

**6.	Making some predictions**

In [None]:
input_data = (5.8,4.0,1.2,0.2)
input_data_df = pd.DataFrame([input_data], columns=X_train.columns)

models = {
    'Logistic Regression': model_lr,
    'Linear Discriminant Analysis': model_lda,
    'K-Nearest Neighbors': model_knn,
    'Classification and Regression Trees': model_cart,
    'Gaussian Naive Bayes': model_nb,
    'Support Vector Machines': model_svm
}

optimal_model = models[best_model]
prediction = optimal_model.predict(input_data_df)
print(prediction)
print(f"predicted using {best_model}")