In [None]:
import pandas as pd
from ucimlrepo import fetch_ucirepo
import matplotlib.pyplot as plt
import numpy as np

### Importing and Loading Data
---

In [None]:
# fetch dataset 
iris = fetch_ucirepo(id=53) 

# metadata 
print(iris.metadata) 
  
# variable information 
print(iris.variables) 

print(type(iris))

In [None]:
for d in dict(iris):
    print(d)

print()

for d in dict(iris)['data']:
    print(d)

display(dict(iris)['data']['ids'])
display(dict(iris)['data']['features'])
display(dict(iris)['data']['targets'])
display(dict(iris)['data']['original'])
display(dict(iris)['data']['headers'])

In [None]:
data = pd.DataFrame(dict(iris)['data']['original'])
features = pd.DataFrame(dict(iris)['data']['features'])
targets = pd.DataFrame(dict(iris)['data']['targets'])

display(data.head())
display(features.head())
display(targets.head())

### Calculating Summary Statistics
--- 

In [None]:
for c in data.columns:
    if (pd.api.types.is_any_real_numeric_dtype(data[c])):
        print(f"{c}:")
        print(f"\tMean = {data[c].mean():.2f}")
        print(f"\tStandard Deviation = {data[c].std():.2f}")
        print(f"\tMinimum Value = {data[c].min():.2f}")
        print(f"\tMinimum Value = {data[c].max():.2f}")
        print()

In [None]:
data['class'].value_counts()

In [None]:
data.describe(include='all')

In [None]:
print("Covariance:")
features.cov()

In [None]:
print("Correlation:")
features.corr()

In [None]:
data['sepal length'].plot(kind='hist', title='sepal length histogram', xlabel='length (cm)', ylabel='frequency',
                          bins=np.arange(4.0,8.5,0.5), xticks=np.arange(4.0,8.5,0.5), edgecolor='black', grid=True)
plt.show()

In [None]:
features.plot(kind='box', title='feature distribution', xlabel='feature', ylabel='cm')
plt.show()

In [None]:
fig, axes = plt.subplots(3,2,figsize=(12,12))

index = 0
for i in range(3):
    for j in range(i+1, 4):
        ax1 = index // 2
        ax2 = index % 2
        axes[ax1,ax2].plot(data.iloc[:,i], data.iloc[:,j], 'o')
        axes[ax1,ax2].set_xlabel(f"{data.columns[i]}")
        axes[ax1,ax2].set_ylabel(f"{data.columns[j]}")
        index += 1

In [None]:
pd.plotting.parallel_coordinates(data, 'class')