# Iris Dataset Exploration

## Explore the similarities, differences and relationships amongst three different Iris species in terms of their sepal and petal widths and lengths

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from pandas_profiling import ProfileReport
from sklearn.datasets import load_iris
%matplotlib inline

## Load Dataset, Explore and Display Features

In [None]:
iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

In [None]:
iris

In [None]:
iris_df

In [None]:
iris_df['target'] = iris_df['target'].replace([0,1,2],['setosa', 'versicolor', 'virginica'])

In [None]:
iris_df

In [None]:
iris_df.shape

In [None]:
iris_df.describe()

In [None]:
iris_df.info()

## Create scatterplot of sepal length vs sepal width - add petal length

In [None]:
plt.figure(figsize=(20,12))
sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', data=iris_df);

In [None]:
plt.figure(figsize=(20,12))
sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', data=iris_df, hue='petal length (cm)');

## Explore the dataset using tools and libraries available in Python

In [None]:
profile = ProfileReport(iris_df)
profile

## Calculations of dot product, norm and distance

In [None]:
# Calculate dot product between the sepal length vector vs the sepal width vector
s_length = iris_df['sepal length (cm)']
s_width = iris_df['sepal width (cm)']
s_length.dot(s_width)

In [None]:
# Calculate the norm of the sepal length vectors
s_length = iris_df['sepal length (cm)']
sepal_length_norm = np.linalg.norm(s_length)
sepal_length_norm

In [None]:
# Calculate the distance between the sepal length vector and the sepal width vector
s_length = iris_df['sepal length (cm)']
s_width = iris_df['sepal width (cm)']
dist = (np.linalg.norm(s_length - s_width))
dist

## Additional Visualizations

In [None]:
# Scatter plot of petal length vs petal width
plt.figure(figsize=(20,12))
sns.scatterplot(x='petal length (cm)', y='petal width (cm)', data=iris_df);

In [None]:
# Scatter plot of sepal length vs sepal width, color coded by species
plt.figure(figsize=(20,12))
sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', data=iris_df, hue='target', palette="prism_r");

In [None]:
# Scatter plot of petal length vs petal width, color coded by species
plt.figure(figsize=(20,12))
sns.scatterplot(x='petal length (cm)', y='petal width (cm)', data=iris_df, hue='target', palette="prism_r");

In [None]:
sns.pairplot(iris_df, hue='target', palette="prism_r");

In [None]:
sns.set_style("whitegrid")
  
sns.boxplot(x = 'target', y = 'sepal length (cm)', data = iris_df);

In [None]:
sns.set_style("whitegrid")
sns.boxplot(x = 'target', y = 'sepal width (cm)', data = iris_df);

In [None]:
sns.set_style("whitegrid")
sns.boxplot(x = 'target', y = 'petal length (cm)', data = iris_df);

In [None]:
sns.set_style("whitegrid")
sns.boxplot(x = 'target', y = 'petal width (cm)', data = iris_df);

In [None]:
g = sns.PairGrid(iris_df)
g.map_diag(plt.hist)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot);

## Conclusions