# Heart Failure Clinical Records Clustering Analysis

## This notebook provides a comprehensive clustering analysis of heart failure clinical records with visualizations, multiple evaluation metrics, and detailed insights.

# Import Libraries and Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

# Load and Explore the Dataset

In [None]:
# Load the heart failure dataset from UCI ML Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv"

try:
    df = pd.read_csv(url)
    print("Dataset loaded successfully")
except:
    print("Loading from URL failed, using local file")
    df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

print(f"\n Dataset Shape: {df.shape}")
print(f"Features: {list(df.columns)}")
print(f"\n Dataset Info:")
print(df.info())

# Data Exploration

In [None]:
print("Dataset Statistics:")
print(df.describe())
print(f"\n Missing Values:")
print(df.isnull().sum())
print(f"\n First 5 rows:")
df.head()