# Iris Web App

## Install Required Libraries

In [1]:
%pip install pandas -q
%pip install seaborn -q
%pip install scikit-learn -q

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


## Check Libraries' Versions

In [2]:
%pip show pandas

Name: pandas
Version: 2.2.0
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: 
Author-email: The Pandas Development Team <pandas-dev@python.org>
License: BSD 3-Clause License

Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Copyright (c) 2011-2023, Open source contributors.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
  contributors may be u

In [3]:
%pip show scikit-learn

Name: scikit-learnNote: you may need to restart the kernel to use updated packages.

Version: 1.4.0
Summary: A set of python modules for machine learning and data mining
Home-page: https://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: c:\Users\Michael\AppData\Local\Programs\Python\Python311\Lib\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: autoviz, dtale, pandas-dq


In [4]:
%pip freeze

absl-py==2.0.0Note: you may need to restart the kernel to use updated packages.

altair==5.1.1
annotated-types==0.6.0
ansi2html==1.8.0
anyio==4.0.0
appdirs==1.4.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.2.3
asgiref==3.7.2
astor==0.8.1
asttokens==2.2.1
astunparse==1.6.3
async-lru==2.0.4
attrs==23.1.0
autoviz==0.1.730
Babel==2.12.1
backcall==0.2.0
beautifulsoup4==4.12.2
bleach==6.0.0
blinker==1.6.2
bokeh==2.4.3
branca==0.7.0
Brotli==1.0.9
bs4==0.0.1
cachetools==5.3.1
certifi==2023.7.22
cffi==1.15.1
charset-normalizer==3.2.0
click==8.1.7
click-plugins==1.1.1
cligj==0.7.2
colorama==0.4.6
colorcet==3.0.1
comm==0.1.4
contourpy==1.1.0
cycler==0.11.0
dacite==1.8.1
dash==2.13.0
dash-bootstrap-components==1.3.1
dash-colorscales==0.0.4
dash-core-components==2.0.0
dash-daq==0.5.0
dash-html-components==2.0.0
dash-table==5.0.0
debugpy==1.6.7.post1
decorator==5.1.1
defusedxml==0.7.1
Django==5.0
dtale==3.10.0
duckdb==0.8.1
emoji==2.8.0
et-xmlfile==1.1.0
executing==1.2.0
fastjsonschem

## Import Required Libraries

In [5]:
import pickle
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

## Import Data Set

In [6]:
iris = sns.load_dataset(name="iris")
iris.to_csv(path_or_buf="./iris.csv", index=False)

In [7]:
df = pd.read_csv(filepath_or_buffer="./iris.csv")
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Separate Features and Target

In [8]:
target = "species"
features = df.columns.to_list()
features.remove(target)
print(target)
print(features)

species
['sepal_length', 'sepal_width', 'petal_length', 'petal_width']


In [10]:
label = dict(
    target=target,
    features=features,
)
print(label)

{'target': 'species', 'features': ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']}


In [11]:
scaler = dict()
for key in features:
    scaler[key] = StandardScaler().fit(X=df[[key]].values)
print(scaler)

{'sepal_length': StandardScaler(), 'sepal_width': StandardScaler(), 'petal_length': StandardScaler(), 'petal_width': StandardScaler()}


In [12]:
value = dict()
for key in features:
    value[key] = df[key].median()
print(value)

{'sepal_length': 5.8, 'sepal_width': 3.0, 'petal_length': 4.35, 'petal_width': 1.3}


## Scale Numerical Features

In [13]:
df_scaled = df.copy()
for key in features:
    df_scaled[key] = scaler[key].transform(X=df[[key]].values)
df_scaled.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,-0.900681,1.019004,-1.340227,-1.315444,setosa
1,-1.143017,-0.131979,-1.340227,-1.315444,setosa
2,-1.385353,0.328414,-1.397064,-1.315444,setosa
3,-1.506521,0.098217,-1.283389,-1.315444,setosa
4,-1.021849,1.249201,-1.340227,-1.315444,setosa


## Train Model

In [14]:
X = df_scaled[features].values
y = df_scaled[target].values
print(X[0:5, :])
print(y[0:5])

[[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
 [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]
 [-1.38535265  0.32841405 -1.39706395 -1.3154443 ]
 [-1.50652052  0.09821729 -1.2833891  -1.3154443 ]
 [-1.02184904  1.24920112 -1.34022653 -1.3154443 ]]
['setosa' 'setosa' 'setosa' 'setosa' 'setosa']


In [15]:
model = RandomForestClassifier().fit(X=X, y=y)
model

## Dumping Files

In [16]:
pickle.dump(obj=label, file=open(file="./label.pkl", mode="wb"))
pickle.dump(obj=scaler, file=open(file="./scaler.pkl", mode="wb"))
pickle.dump(obj=value, file=open(file="./value.pkl", mode="wb"))
pickle.dump(obj=model, file=open(file="./model.pkl", mode="wb"))

In [17]:
sample = df[features].sample(n=5, random_state=42)
sample.to_csv(path_or_buf="./sample.csv", index=False)
print(sample)

     sepal_length  sepal_width  petal_length  petal_width
73            6.1          2.8           4.7          1.2
18            5.7          3.8           1.7          0.3
118           7.7          2.6           6.9          2.3
78            6.0          2.9           4.5          1.5
76            6.8          2.8           4.8          1.4


## Model from Dumped Files

In [21]:
label = pickle.load(file=open(file="./label.pkl", mode="rb"))
scaler = pickle.load(file=open(file="./scaler.pkl", mode="rb"))
value = pickle.load(file=open(file="./value.pkl", mode="rb"))
model = pickle.load(file=open(file="./model.pkl", mode="rb"))

## Load Sample Data Set

In [22]:
sample = pd.read_csv(filepath_or_buffer="./sample.csv")
print(sample)

   sepal_length  sepal_width  petal_length  petal_width
0           6.1          2.8           4.7          1.2
1           5.7          3.8           1.7          0.3
2           7.7          2.6           6.9          2.3
3           6.0          2.9           4.5          1.5
4           6.8          2.8           4.8          1.4


## Making Prediction

In [23]:
df_test = sample.copy()
for key in label["features"]:
    df_test[key] = scaler[key].transform(X=sample[[key]].values)
X_test = df_test.values
y_pred = model.predict(X=X_test)
print(X_test)
print(y_pred)

[[ 3.10997534e-01 -5.92373012e-01  5.35408562e-01  8.77547895e-04]
 [-1.73673948e-01  1.70959465e+00 -1.16971425e+00 -1.18381211e+00]
 [ 2.24968346e+00 -1.05276654e+00  1.78583195e+00  1.44883158e+00]
 [ 1.89829664e-01 -3.62176246e-01  4.21733708e-01  3.95774101e-01]
 [ 1.15917263e+00 -5.92373012e-01  5.92245988e-01  2.64141916e-01]]
['versicolor' 'setosa' 'virginica' 'versicolor' 'versicolor']
