In [27]:
import pandas as pd
import numpy as np
import seaborn as sns

In [28]:
iris = sns.load_dataset('iris')
iris.head()

In [30]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [31]:
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [32]:
summary_stats = iris.groupby('species')['sepal_length'].agg(['mean', 'median', 'max', 'min', 'std']).reset_index() 

In [33]:
summary_stats

Unnamed: 0,species,mean,median,max,min,std
0,setosa,5.006,5.0,5.8,4.3,0.35249
1,versicolor,5.936,5.9,7.0,4.9,0.516171
2,virginica,6.588,6.5,7.9,4.9,0.63588


In [34]:
grouped_stats = iris.groupby('species').describe()
grouped_stats.T

Unnamed: 0,species,setosa,versicolor,virginica
sepal_length,count,50.0,50.0,50.0
sepal_length,mean,5.006,5.936,6.588
sepal_length,std,0.35249,0.516171,0.63588
sepal_length,min,4.3,4.9,4.9
sepal_length,25%,4.8,5.6,6.225
sepal_length,50%,5.0,5.9,6.5
sepal_length,75%,5.2,6.3,6.9
sepal_length,max,5.8,7.0,7.9
sepal_width,count,50.0,50.0,50.0
sepal_width,mean,3.428,2.77,2.974


In [35]:
grouped_qualitative = iris.groupby('species')['sepal_length'].describe()
grouped_qualitative

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
setosa,50.0,5.006,0.35249,4.3,4.8,5.0,5.2,5.8
versicolor,50.0,5.936,0.516171,4.9,5.6,5.9,6.3,7.0
virginica,50.0,6.588,0.63588,4.9,6.225,6.5,6.9,7.9


In [36]:
set = iris['species'].unique()

In [37]:
for species in set:
    print(f'\n{'='*50}')
    print(f'Species: {species}')
    print('='*50)
    species_data = iris[iris['species']==species]
    display(species_data.describe(percentiles=[.25, .5, .75]))


Species: setosa


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,50.0,50.0,50.0,50.0
mean,5.006,3.428,1.462,0.246
std,0.35249,0.379064,0.173664,0.105386
min,4.3,2.3,1.0,0.1
25%,4.8,3.2,1.4,0.2
50%,5.0,3.4,1.5,0.2
75%,5.2,3.675,1.575,0.3
max,5.8,4.4,1.9,0.6



Species: versicolor


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,50.0,50.0,50.0,50.0
mean,5.936,2.77,4.26,1.326
std,0.516171,0.313798,0.469911,0.197753
min,4.9,2.0,3.0,1.0
25%,5.6,2.525,4.0,1.2
50%,5.9,2.8,4.35,1.3
75%,6.3,3.0,4.6,1.5
max,7.0,3.4,5.1,1.8



Species: virginica


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,50.0,50.0,50.0,50.0
mean,6.588,2.974,5.552,2.026
std,0.63588,0.322497,0.551895,0.27465
min,4.9,2.2,4.5,1.4
25%,6.225,2.8,5.1,1.8
50%,6.5,3.0,5.55,2.0
75%,6.9,3.175,5.875,2.3
max,7.9,3.8,6.9,2.5


In [38]:
features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
for species in set:
    print(f'\n{'='*50}')
    print(f'Species: {species}')
    print('='*50)
    species_data = iris[iris['species'] == species]
    for feature in features:
        print(f'\nFeature: {feature}')
        data = species_data[feature]

        print(f'Count: {len(data)}')
        print(f'Mean: {data.mean():.2f}')
        print(f'Median: {data.median():.2f}')
        print(f'Min: {data.min():.2f}')
        print(f'Max: {data.max():.2f}')
        print(f'Std: {data.std():.2f}')
        print(f'q1: {data.quantile(0.25):.2f}')
        print(f'q3: {data.quantile(0.75):.2f}')


Species: setosa

Feature: sepal_length
Count: 50
Mean: 5.01
Median: 5.00
Min: 4.30
Max: 5.80
Std: 0.35
q1: 4.80
q3: 5.20

Feature: sepal_width
Count: 50
Mean: 3.43
Median: 3.40
Min: 2.30
Max: 4.40
Std: 0.38
q1: 3.20
q3: 3.68

Feature: petal_length
Count: 50
Mean: 1.46
Median: 1.50
Min: 1.00
Max: 1.90
Std: 0.17
q1: 1.40
q3: 1.58

Feature: petal_width
Count: 50
Mean: 0.25
Median: 0.20
Min: 0.10
Max: 0.60
Std: 0.11
q1: 0.20
q3: 0.30

Species: versicolor

Feature: sepal_length
Count: 50
Mean: 5.94
Median: 5.90
Min: 4.90
Max: 7.00
Std: 0.52
q1: 5.60
q3: 6.30

Feature: sepal_width
Count: 50
Mean: 2.77
Median: 2.80
Min: 2.00
Max: 3.40
Std: 0.31
q1: 2.52
q3: 3.00

Feature: petal_length
Count: 50
Mean: 4.26
Median: 4.35
Min: 3.00
Max: 5.10
Std: 0.47
q1: 4.00
q3: 4.60

Feature: petal_width
Count: 50
Mean: 1.33
Median: 1.30
Min: 1.00
Max: 1.80
Std: 0.20
q1: 1.20
q3: 1.50

Species: virginica

Feature: sepal_length
Count: 50
Mean: 6.59
Median: 6.50
Min: 4.90
Max: 7.90
Std: 0.64
q1: 6.22
q3: 6.90

