# Univariate, Bivariate, and Multivariate Analysis

## Cancer Incidence Data Analysis

This notebook covers:
- Univariate Analysis
- Bivariate Analysis
- Multivariate Analysis

In [None]:
# Import libraries
import sys
sys.path.append('../../scripts/python')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from data_loader import load_data, clean_data
from univariate_analysis import univariate_analysis
from bivariate_analysis import bivariate_analysis, categorical_bivariate_analysis
from multivariate_analysis import multivariate_analysis, cluster_analysis

import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
%matplotlib inline

In [None]:
# Load data
df = load_data('../../data/incd.csv')
df_clean = clean_data(df)
print(f"Dataset: {df_clean.shape}")

## Univariate Analysis

In [None]:
# Univariate analysis
result = univariate_analysis(df_clean, 'Incidence_Rate')

## Bivariate Analysis

In [None]:
# Bivariate analysis
if 'Annual_Count' in df_clean.columns:
    bivariate_analysis(df_clean, 'Incidence_Rate', 'Annual_Count')
if 'Trend' in df_clean.columns:
    categorical_bivariate_analysis(df_clean, 'Trend', 'Incidence_Rate')

## Multivariate Analysis

In [None]:
# Multivariate analysis
multivariate_analysis(df_clean)
cluster_analysis(df_clean, n_clusters=3)