In [2]:
!pip install pandas

Collecting pandas
  Using cached pandas-2.3.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.26.0 (from pandas)
  Using cached numpy-2.3.3-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.3.3-cp313-cp313-win_amd64.whl (11.0 MB)
Using cached numpy-2.3.3-cp313-cp313-win_amd64.whl (12.8 MB)
Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, numpy, pandas

   ---------------------------------------- 0/4 [pytz]
   ---------------------------------------- 0/4 [pytz]
   ---------- ----------------------------- 1/4 [tzdata]
   -------------------- ------------------- 2/4 [numpy]
   -------------------- ------------------- 2/4 [numpy]
   --------------

In [3]:
import pandas as pd
from collections import Counter, defaultdict



In [7]:
df = pd.read_excel("dementia_dataset (2).xls")
# df = pd.read_csv("dementia_dataset.csv")  # alternative

# 2) Convert to plain Python for DS ops
records = df.to_dict(orient="records")            # list[dict]
cols    = list(df.columns)                         # list[str]
n_rows  = len(records)

# 3) Basic counts using Counter/set
group_counts = Counter(r.get("Group") for r in records)   # occurrences per label
unique_subjects = set(r.get("Subject ID") for r in records if "Subject ID" in r)

# 4) Simple numeric summaries with dict/list comps (no numpy needed)
def col_stats(col):
    vals = [r[col] for r in records if isinstance(r.get(col), (int, float))]
    if not vals: return {"count":0}
    vals.sort()
    mid = len(vals)//2
    median = (vals[mid] if len(vals)%2==1 else (vals[mid-1]+vals[mid])/2)
    return {
        "count": len(vals),
        "min": min(vals),
        "max": max(vals),
        "mean": sum(vals)/len(vals),
        "median": median
    }

num_summary = {}
for c in ["Age","MMSE","CDR","eTIV","nWBV","EDUC","SES"]:
    if c in cols:
        num_summary[c] = col_stats(c)

# 5) MMSE bucket counts via dict + list comp
mmse_bins = defaultdict(int)
if "MMSE" in cols:
    for r in records:
        v = r.get("MMSE")
        if isinstance(v, (int, float)):
            # buckets: 0–9, 10–14, 15–19, 20–24, 25–27, 28–30
            if v < 10:      mmse_bins["00-09"] += 1
            elif v < 15:    mmse_bins["10-14"] += 1
            elif v < 20:    mmse_bins["15-19"] += 1
            elif v < 25:    mmse_bins["20-24"] += 1
            elif v < 28:    mmse_bins["25-27"] += 1
            else:           mmse_bins["28-30"] += 1

# 6) Example of dict-of-lists “index”: group → list[Subject ID]
by_group_subjects = defaultdict(list)
if "Group" in cols and "Subject ID" in cols:
    for r in records:
        by_group_subjects[r["Group"]].append(r["Subject ID"])

# 7) Print concise explanation-style outputs
print(f"Rows: {n_rows}, Columns: {len(cols)}")
print("Group counts:", dict(group_counts))
print("Unique subjects:", len(unique_subjects))
print("Numeric summaries:", num_summary)
print("MMSE buckets:", dict(mmse_bins))

Rows: 373, Columns: 15
Group counts: {'Nondemented': 190, 'Demented': 146, 'Converted': 37}
Unique subjects: 150
Numeric summaries: {'Age': {'count': 373, 'min': 60, 'max': 98, 'mean': 77.01340482573727, 'median': 77}, 'MMSE': {'count': 373, 'min': 4.0, 'max': 30.0, 'mean': nan, 'median': 29.0}, 'CDR': {'count': 373, 'min': 0.0, 'max': 2.0, 'mean': 0.29088471849865954, 'median': 0.0}, 'eTIV': {'count': 373, 'min': 1106, 'max': 2004, 'mean': 1488.1286863270777, 'median': 1470}, 'nWBV': {'count': 373, 'min': 0.644, 'max': 0.837, 'mean': 0.7295683646112601, 'median': 0.729}, 'EDUC': {'count': 373, 'min': 6, 'max': 23, 'mean': 14.597855227882038, 'median': 15}, 'SES': {'count': 373, 'min': 1.0, 'max': 5.0, 'mean': nan, 'median': 2.0}}
MMSE buckets: {'25-27': 64, '28-30': 252, '20-24': 40, '15-19': 15, '00-09': 2}
