In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Iris.csv")
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [5]:
df.shape

(150, 6)

In [6]:
df.dtypes

Id                 int64
SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
Species           object
dtype: object

<h3>With in-built functions</h3>

In [22]:
from scipy.stats import mode
central_tendency={}
cols= df.select_dtypes(include=['float'])

for col in cols:
    col_data= df[col].dropna()
    mode_val = col_data.mode()
    central_tendency[col] = {
        'Mean' : col_data.mean(),
        'Median' :  col_data.median(),
        'Mode' :  mode_val[0] if len(mode_val) > 0 else None,
        'MidRange' : (col_data.min() + col_data.max())/2
    }

central_ten_df = pd.DataFrame(central_tendency).T
central_ten_df.index.name = "Atribute"

print(central_ten_df)

                   Mean  Median  Mode  MidRange
Atribute                                       
SepalLengthCm  5.843333    5.80   5.0      6.10
SepalWidthCm   3.054000    3.00   3.0      3.20
PetalLengthCm  3.758667    4.35   1.5      3.95
PetalWidthCm   1.198667    1.30   0.2      1.30


In [35]:
central_ten = {}
cols= df.select_dtypes(include=['float'])

for col in cols:
    data =  sorted(df[col].dropna())
    n = len(data)

    freq={}
    for val in data:
        freq[val] = freq.get(val,0)+1
    mode_val = max(freq, key=freq.get)

    central_ten[col] = {
        'Mean' : sum(data)/n,
        'Median' : data[n // 2] if n%2 else (data[n // 2 - 1] + data[n // 2]) / 2,
        'Mode' : mode_val,
        'Midrange': (min(data) + max(data)) / 2
    }

cen_ten_df = pd.DataFrame(central_ten).T
cen_ten_df.index.name = "Attribute"
print(cen_ten_df)


                   Mean  Median  Mode  Midrange
Attribute                                      
SepalLengthCm  5.843333    5.80   5.0      6.10
SepalWidthCm   3.054000    3.00   3.0      3.20
PetalLengthCm  3.758667    4.35   1.5      3.95
PetalWidthCm   1.198667    1.30   0.2      1.30


In [42]:
data = pd.read_csv("Iris.csv")

In [43]:
# Convert data into a DataFrame
df = pd.DataFrame(data)

# Selecting only numerical columns for calculation
numerical_data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]

# Calculate range (difference between max and min), variance, standard deviation and quartiles
range_values = numerical_data.max() - numerical_data.min()
variance = numerical_data.var()
std_deviation = numerical_data.std()
quartiles = numerical_data.quantile([0.25, 0.5, 0.75])

# Display results
print(f"Range:\n{range_values}\n")
print(f"Variance:\n{variance}\n")
print(f"Standard Deviation:\n{std_deviation}\n")
print(f"Quartiles (25%, 50%, 75%):\n{quartiles}")

Range:
SepalLengthCm    3.6
SepalWidthCm     2.4
PetalLengthCm    5.9
PetalWidthCm     2.4
dtype: float64

Variance:
SepalLengthCm    0.685694
SepalWidthCm     0.188004
PetalLengthCm    3.113179
PetalWidthCm     0.582414
dtype: float64

Standard Deviation:
SepalLengthCm    0.828066
SepalWidthCm     0.433594
PetalLengthCm    1.764420
PetalWidthCm     0.763161
dtype: float64

Quartiles (25%, 50%, 75%):
      SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
0.25            5.1           2.8           1.60           0.3
0.50            5.8           3.0           4.35           1.3
0.75            6.4           3.3           5.10           1.8


In [44]:
summary_statistics = data.groupby("Species").agg(
    {
        "SepalLengthCm": ["mean", "median", "min", "max", "std"],
        "SepalWidthCm": ["mean", "median", "min", "max", "std"],
        "PetalLengthCm": ["mean", "median", "min", "max", "std"],
        "PetalWidthCm": ["mean", "median", "min", "max", "std"],
    }
)
# Print nicely formatted summary statistics
print("\nSummary Statistics for Iris Dataset (Grouped by Species):")
print(summary_statistics)



Summary Statistics for Iris Dataset (Grouped by Species):
                SepalLengthCm                            SepalWidthCm         \
                         mean median  min  max       std         mean median   
Species                                                                        
Iris-setosa             5.006    5.0  4.3  5.8  0.352490        3.418    3.4   
Iris-versicolor         5.936    5.9  4.9  7.0  0.516171        2.770    2.8   
Iris-virginica          6.588    6.5  4.9  7.9  0.635880        2.974    3.0   

                                    PetalLengthCm                             \
                 min  max       std          mean median  min  max       std   
Species                                                                        
Iris-setosa      2.3  4.4  0.381024         1.464   1.50  1.0  1.9  0.173511   
Iris-versicolor  2.0  3.4  0.313798         4.260   4.35  3.0  5.1  0.469911   
Iris-virginica   2.2  3.8  0.322497         5.552   5.55  4.