# Bivariate Analysis - Iris Dataset

This notebook performs bivariate analysis between pairs of features.


## 1. Import Libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
import warnings
from pathlib import Path
from itertools import combinations

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
np.random.seed(42)


## 2. Load Data


In [None]:
data_path = Path('../../data/Iris.csv')
df = pd.read_csv(data_path)
features = ['sepal.length', 'sepal.width', 'petal.length', 'petal.width']
print('Data loaded successfully!')


## 3. Scatter Plots


In [None]:
pairs = list(combinations(features, 2))
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for idx, (f1, f2) in enumerate(pairs):
    ax = axes[idx]
    for variety in df['variety'].unique():
        data = df[df['variety'] == variety]
        ax.scatter(data[f1], data[f2], label=variety, alpha=0.6)
    ax.set_xlabel(f1)
    ax.set_ylabel(f2)
    ax.set_title(f'{f1} vs {f2}')
    ax.legend()
    ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 4. Correlation Analysis


In [None]:
print('=' * 80)
print('PEARSON CORRELATION COEFFICIENTS')
print('=' * 80)
for f1, f2 in pairs:
    corr, p_value = pearsonr(df[f1], df[f2])
    print(f'{f1} vs {f2}: r = {corr:.4f}, p-value = {p_value:.6f}')


## 5. Correlation Heatmap


In [None]:
corr_matrix = df[features].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, square=True, 
            linewidths=1, cbar_kws={'shrink': 0.8})
plt.title('Correlation Matrix', fontsize=16)
plt.tight_layout()
plt.show()


## 6. Pair Plot


In [None]:
sns.pairplot(df, hue='variety', diag_kind='hist', height=2.5)
plt.suptitle('Pair Plot of Iris Dataset', y=1.02, fontsize=16)
plt.tight_layout()
plt.show()
