###### IRIS DATASET - SUPERVISED LEARNING

In [1]:
### Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


### Modelling libraries
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

Data Preprocessing

In [2]:
### Load the data
Iris = pd.read_csv('IRIS.csv')

### Read the data
Iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
### check unique values
Iris['species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [4]:
#### change species into numerical value  
mapping = {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2 }

Iris['species'].replace(mapping, inplace=True)

In [5]:
### View the data
Iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


Modelling

In [6]:
### Split the data
x = Iris.drop('species', axis=1)
y = Iris['species']

### split into train and test

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=40)

In [7]:
### Function for model
def model_trainer(data, models):
    x_train, x_test, y_train, y_test = data
    for model in models:
        model.fit(y_train, y_train)
        preds = model.predict(x_test)
        accuracy = accuracy_score(y_test, preds)
        print(f'model:(model), accuracy:(accuracy)')

In [8]:
### Define parameters

### define data
data = (x_train, x_test, y_train, y_test)

### Define models
svc = SVC()
forest = RandomForestClassifier()
dt = DecisionTreeClassifier()

models = [svc, forest, dt]

In [9]:
### train models and get best performance
model_trainer(data=data, models=models)

ValueError: Expected 2D array, got 1D array instead:
array=[1. 0. 0. 0. 0. 2. 0. 0. 2. 0. 0. 1. 2. 2. 2. 0. 2. 2. 1. 0. 2. 1. 1. 1.
 0. 0. 1. 1. 2. 2. 0. 2. 1. 0. 2. 0. 0. 2. 0. 2. 2. 0. 2. 1. 0. 0. 2. 0.
 2. 2. 1. 1. 2. 1. 2. 0. 2. 1. 1. 0. 0. 0. 2. 2. 1. 0. 1. 2. 2. 1. 2. 2.
 1. 1. 0. 1. 1. 2. 1. 2. 0. 2. 2. 1. 1. 0. 2. 1. 0. 1. 2. 0. 0. 2. 1. 0.
 0. 1. 0. 2. 1. 2. 0. 2. 1. 0. 0. 2. 0. 1. 1. 1. 1. 0. 2. 1. 0. 1. 0. 1.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
### save models
import joblib
### Save randomforest
joblib.dump(forest, 'forest.pk1')

### Save svc
joblib.dump(svc, 'svc.pk1')

['svc.pk1']