# Univariate Analysis - Iris Dataset

This notebook performs univariate analysis on each feature.


## 1. Import Libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
from pathlib import Path

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
np.random.seed(42)


## 2. Load Data


In [None]:
data_path = Path('../../data/Iris.csv')
df = pd.read_csv(data_path)
features = ['sepal.length', 'sepal.width', 'petal.length', 'petal.width']
print('Data loaded successfully!')


## 3. Univariate Analysis for Each Feature


In [None]:
for feature in features:
    print('=' * 80)
    print(f'UNIVARIATE ANALYSIS: {feature}')
    print('=' * 80)
    print(f'Mean: {df[feature].mean():.3f}')
    print(f'Median: {df[feature].median():.3f}')
    print(f'Std Dev: {df[feature].std():.3f}')
    print(f'Min: {df[feature].min():.3f}')
    print(f'Max: {df[feature].max():.3f}')
    print(f'Skewness: {df[feature].skew():.3f}')
    print(f'Kurtosis: {df[feature].kurtosis():.3f}')
    print()


## 4. Distribution Plots


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
for idx, feature in enumerate(features):
    ax = axes[idx // 2, idx % 2]
    ax.hist(df[feature], bins=30, edgecolor='black', alpha=0.7)
    ax.axvline(df[feature].mean(), color='r', linestyle='--', label='Mean')
    ax.axvline(df[feature].median(), color='g', linestyle='--', label='Median')
    ax.set_xlabel(feature)
    ax.set_ylabel('Frequency')
    ax.set_title(f'Distribution of {feature}')
    ax.legend()
    ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 5. Box Plots


In [None]:
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
for idx, feature in enumerate(features):
    axes[idx].boxplot(df[feature])
    axes[idx].set_title(f'{feature}')
    axes[idx].set_ylabel('Value')
    axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 6. Q-Q Plots for Normality


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
for idx, feature in enumerate(features):
    ax = axes[idx // 2, idx % 2]
    stats.probplot(df[feature], dist='norm', plot=ax)
    ax.set_title(f'Q-Q Plot: {feature}')
    ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
