In [2]:
from google.colab import files
import io  # Import the standard io module
import pandas as pd

uploaded = files.upload()                # ⇧ choose CardioVascular disease data.xlsx
RAW = next(iter(uploaded))
print("Loaded:", RAW)

# Use io.BytesIO from the imported io module
xls = pd.ExcelFile(io.BytesIO(uploaded[RAW]))
print("Sheets:", xls.sheet_names)


Saving CardioVascular disease data.xlsx to CardioVascular disease data.xlsx
Loaded: CardioVascular disease data.xlsx
Sheets: ['CardioVascular disease data']


In [5]:
# ➋  Read the first (only) sheet – it already has year_month & cvd_cases
monthly_df = pd.read_excel(io.BytesIO(uploaded[RAW]), sheet_name=0)

# ➌  Minimal hygiene
monthly_df['year_month'] = pd.to_datetime(monthly_df['year_month'])

# Import numpy
import numpy as np

def clean_sex(col):
    txt = col.astype(str).str.strip().str.upper().replace({
        'MALE':'M','MASCULIN':'M','FEMALE':'F','FEM':'F','FEMME':'F'
    })
    return np.where(txt.isin(['M','F']), txt, 'UNKNOWN')
monthly_df['sex'] = pd.Categorical(clean_sex(monthly_df['sex']),
                                   categories=['M','F','UNKNOWN'])
for cat in ['age_band','residence']:
    monthly_df[cat] = monthly_df[cat].astype('category')

# ➍  Save as CSV for Streamlit
CSV_NAME = "CardioVascular disease data.csv"
monthly_df.to_csv(CSV_NAME, index=False)
print("✅ Tidy CSV saved:", CSV_NAME)
monthly_df.head()

✅ Tidy CSV saved: CardioVascular disease data.csv


Unnamed: 0,year_month,residence,sex,age_band,cvd_cases
0,2019-01-01,ADONIS,UNKNOWN,Unknown,1
1,2019-01-01,AJALTOUN,UNKNOWN,Unknown,0
2,2019-01-01,AKKAR,UNKNOWN,Unknown,1
3,2019-01-01,AMCHIT,UNKNOWN,Unknown,1
4,2019-01-01,BOUAR,UNKNOWN,Unknown,1


In [8]:
# ➎ Create the Streamlit project folder
import pathlib # Import the pathlib module
import shutil # Import the shutil module
import os # Import the os module

proj = pathlib.Path("cvd_app")
(proj/"pages").mkdir(parents=True, exist_ok=True)
(proj/"data").mkdir(exist_ok=True)

# Copy the tidy CSV into the project’s data folder
shutil.copy(CSV_NAME, proj/"data"/CSV_NAME)
print("✅ Project folders ready:")
for root,_,files_ in os.walk(proj):
    for f in files_:
        print(root, "→", f)


✅ Project folders ready:
cvd_app/data → CardioVascular disease data.csv


In [9]:
%%writefile cvd_app/utils.py
import pandas as pd, pathlib, functools
DATA = pathlib.Path(__file__).parent / 'data'
FILENAME = "CardioVascular disease data.csv"

@functools.lru_cache(maxsize=1)
def load_monthly():
    return pd.read_csv(DATA/FILENAME,
                       parse_dates=['year_month'],
                       dtype={'sex':'category',
                              'age_band':'category',
                              'residence':'category'})


Writing cvd_app/utils.py


In [10]:
%%writefile cvd_app/Home.py
import streamlit as st, plotly.express as px, pandas as pd
from utils import load_monthly

st.set_page_config("CVD Dashboard", layout="wide")
df = load_monthly()

st.title("🫀 Cardiovascular Surgery Cohort – Lebanon • 2019-2023")

latest = df.year_month.max()
curr  = df.loc[df.year_month==latest, 'cvd_cases'].sum()
prev  = df.loc[df.year_month==latest-pd.offsets.MonthEnd(), 'cvd_cases'].sum()
delta = (curr-prev)/prev if prev else 0

c1,c2 = st.columns(2)
c1.metric("CVD cases (latest month)", f"{curr:,}", f"{delta:+.1%}")
c2.metric("Dataset span",
          f"{df.year_month.min():%Y-%m} → {latest:%Y-%m}")

trend = df.groupby('year_month')['cvd_cases'].sum().reset_index()
fig = px.line(trend, x='year_month', y='cvd_cases',
              markers=True, title="Monthly CVD cases")
st.plotly_chart(fig, use_container_width=True)
st.markdown("👈 See *Demographics* page for age/sex breakdown.")


Writing cvd_app/Home.py


In [11]:
%%writefile cvd_app/pages/1_Demographics.py
import streamlit as st, plotly.express as px
from utils import load_monthly

st.title("👥 Demographics")
df = load_monthly()
year = st.selectbox("Year", sorted(df.year_month.dt.year.unique(), reverse=True))
sub  = df[df.year_month.dt.year == year]

pivot = (sub.pivot_table(index='age_band', columns='sex',
                         values='cvd_cases', aggfunc='sum')
           .fillna(0).sort_index())
pivot['F'] = -pivot['F']      # flip females for pyramid

fig = px.bar(pivot, orientation='h',
             labels={'value':'Cases', 'age_band':'Age band'},
             title=f"Age–sex pyramid • {year}", height=520)
fig.update_layout(yaxis={'categoryorder':'array',
                         'categoryarray':pivot.index[::-1]})
st.plotly_chart(fig, use_container_width=True)


Writing cvd_app/pages/1_Demographics.py


In [13]:
!pip install streamlit plotly --quiet
import random # Import the random module
import subprocess # Import the subprocess module
import time # Import the time module

port = random.randint(8501, 8999)
proc = subprocess.Popen(
    ["streamlit", "run", "cvd_app/Home.py",
     "--server.headless", "true",
     "--server.port", str(port),
     "--server.enableCORS", "false"])
time.sleep(5)
print(f"🚀  Click the public link Colab printed above (port {port})")


🚀  Click the public link Colab printed above (port 8852)


In [19]:
!pkill -f "streamlit run" 2>/dev/null


^C


In [20]:
# ▸  install once
!pip install streamlit plotly --quiet

# ▸  run Streamlit on a fixed port
PORT = 8501

import threading, time, os
def run_streamlit():
    os.system(
        f"streamlit run cvd_app/Home.py "
        f"--server.headless true "
        f"--server.address 0.0.0.0 "
        f"--server.port {PORT} "
        f"--server.enableCORS false"
    )

thread = threading.Thread(target=run_streamlit, daemon=True)
thread.start()

# give Streamlit 5-6 s to start
time.sleep(6)

# ▸  ask Colab for the public proxy URL
from google.colab import output
public_url = output.eval_js(f"google.colab.kernel.proxyPort({PORT})")
print("🌍  OPEN THIS LINK ➜", public_url)


🌍  OPEN THIS LINK ➜ https://8501-m-s-23cxn6k67g9vz-b.us-west1-1.prod.colab.dev


In [21]:
!pip install streamlit plotly pyngrok geopandas shapely --quiet


In [22]:
import pathlib, shutil, pandas as pd, numpy as np, textwrap, json, os

# ▸  paths
PROJ   = pathlib.Path("cvd_app"); (PROJ/"pages").mkdir(parents=True, exist_ok=True)
DATA   = PROJ/"data"; DATA.mkdir(exist_ok=True)
CSV_IN = "CardioVascular disease data.csv"      # adjust if your name differs
shutil.copy(CSV_IN, DATA/CSV_IN)

# ▸ utils.py --------------------------------------------------------------
(path:=PROJ/"utils.py").write_text(textwrap.dedent(f"""
import pandas as pd, functools, pathlib
DATA = pathlib.Path(__file__).parent/'data'
F_CSV = '{CSV_IN}'

@functools.lru_cache(maxsize=1)
def load_monthly():
    return pd.read_csv(DATA/F_CSV,
                       parse_dates=['year_month'],
                       dtype={{'sex':'category','age_band':'category',
                              'residence':'category'}})
@functools.lru_cache(maxsize=1)
def load_patients():
    parq = DATA/'patient_level.parquet'
    return pd.read_parquet(parq) if parq.exists() else None
"""))
# ▸ Home.py --------------------------------------------------------------
(PROJ/"Home.py").write_text(textwrap.dedent("""
import streamlit as st, plotly.express as px, pandas as pd
from utils import load_monthly

st.set_page_config('CVD Dashboard', layout='wide')
df = load_monthly()

# ── sidebar global filters
st.sidebar.header('Global filters')
sex_sel = st.sidebar.multiselect('Sex', ['M','F'], default=['M','F'])
age_sel = st.sidebar.multiselect('Age band', df.age_band.cat.categories, default=list(df.age_band.cat.categories))
res_sel = st.sidebar.multiselect('Residence', sorted(df.residence.unique()), default=list(df.residence.unique()))
filt = df[df.sex.isin(sex_sel) & df.age_band.isin(age_sel) & df.residence.isin(res_sel)]

st.title('🫀 Cardiovascular Surgery Cohort – Lebanon')

latest = filt.year_month.max()
curr   = filt.loc[filt.year_month==latest,'cvd_cases'].sum()
prev   = filt.loc[filt.year_month==latest-pd.offsets.MonthEnd(),'cvd_cases'].sum()
delta  = (curr-prev)/prev if prev else 0

c1,c2 = st.columns(2)
c1.metric('CVD cases (latest mth)', f'{curr:,}', f'{delta:+.1%}')
c2.metric('Dataset span', f'{filt.year_month.min():%Y-%m} → {latest:%Y-%m}')

trend = filt.groupby('year_month')['cvd_cases'].sum().reset_index()
fig = px.line(trend, x='year_month', y='cvd_cases', markers=True)
st.plotly_chart(fig, use_container_width=True)

st.markdown('👈 Use sidebar filters & pages to explore demographics, geography, subtypes and prediction.')
"""))
# ▸ pages/1_Demographics.py ----------------------------------------------
(PROJ/"pages"/"1_Demographics.py").write_text(textwrap.dedent("""
import streamlit as st, plotly.express as px
from utils import load_monthly
df = load_monthly()
sex_sel = st.sidebar.session_state.get('sex_sel', ['M','F'])   # inherited
age_sel = st.sidebar.session_state.get('age_sel', list(df.age_band.cat.categories))
res_sel = st.sidebar.session_state.get('res_sel', list(df.residence.unique()))
filt = df[df.sex.isin(sex_sel) & df.age_band.isin(age_sel) & df.residence.isin(res_sel)]

st.title('👥 Age–sex pyramid')
year = st.slider('Year', int(df.year_month.dt.year.min()),
                          int(df.year_month.dt.year.max()),
                          value=int(df.year_month.dt.year.max()))
sub = filt[filt.year_month.dt.year==year]
pivot = (sub.pivot_table(index='age_band', columns='sex',
                         values='cvd_cases', aggfunc='sum')
            .fillna(0).sort_index())
pivot['F'] = -pivot['F']
fig = px.bar(pivot, orientation='h', labels={'value':'Cases','age_band':'Age band'},
             title=f'{year}', height=520)
fig.update_layout(yaxis={'categoryorder':'array','categoryarray':pivot.index[::-1]})
st.plotly_chart(fig, use_container_width=True)
"""))
# ▸ pages/2_Geography.py --------------------------------------------------
(PROJ/"pages"/"2_Geography.py").write_text(textwrap.dedent("""
import streamlit as st, geopandas as gpd, plotly.express as px, pandas as pd
from utils import load_monthly, DATA
if not (DATA/'lebanon.geojson').exists():
    st.info('Upload **lebanon.geojson** to enable this map.')
    st.stop()

geo = gpd.read_file(DATA/'lebanon.geojson')
df  = load_monthly()
sex_sel = st.sidebar.session_state['sex_sel']; age_sel=st.sidebar.session_state['age_sel']; res_sel=['dummy']
filt = df[df.sex.isin(sex_sel) & df.age_band.isin(age_sel)]
month = st.selectbox('Month', filt.year_month.sort_values().unique()[::-1])
sub   = filt[filt.year_month==pd.to_datetime(month)]
agg   = sub.groupby('residence')['cvd_cases'].sum().reset_index()
mapdf = geo.merge(agg, left_on='NAME_EN', right_on='residence', how='left')
mapdf['cvd_cases'] = mapdf['cvd_cases'].fillna(0)
fig = px.choropleth(mapdf, geojson=mapdf.geometry, locations=mapdf.index,
                    color='cvd_cases', hover_name='NAME_EN', color_continuous_scale='Reds')
fig.update_geos(fitbounds='locations', visible=False)
st.title('🗺️ Geographic burden'); st.plotly_chart(fig, use_container_width=True)
"""))
# ▸ pages/3_Subtypes.py ---------------------------------------------------
(PROJ/"pages"/"3_Subtypes.py").write_text(textwrap.dedent("""
import streamlit as st, plotly.express as px
from utils import load_monthly
df = load_monthly()
if not {'htn','cad','pad','dm'}.issubset(df.columns):
    st.info('Subtype columns not found.')
    st.stop()
sex_sel = st.sidebar.session_state['sex_sel']; age_sel=st.sidebar.session_state['age_sel']; res_sel=st.sidebar.session_state['res_sel']
filt = df[df.sex.isin(sex_sel)&df.age_band.isin(age_sel)&df.residence.isin(res_sel)]

sub = st.selectbox("Subtype", ['htn','cad','pad','dm'])
trend = filt.groupby('year_month')[sub].sum().reset_index()
fig = px.area(trend, x='year_month', y=sub, title=sub.upper())
st.title('🩺 Subtype explorer'); st.plotly_chart(fig, use_container_width=True)
"""))
# ▸ pages/4_Prediction.py (bonus) ----------------------------------------
(PROJ/"pages"/"4_Prediction.py").write_text(textwrap.dedent("""
import streamlit as st, pandas as pd
from utils import load_patients
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score

patients = load_patients()
if patients is None or 'ICU stay' not in patients.columns:
    st.info('Row-level file not available.')
    st.stop()

patients['icu_gt5'] = (patients['ICU stay']>5).astype(int)
X = patients[['cvd_flag','age','clampage','fe']].fillna(0)
y = patients['icu_gt5']
Xtr,Xts,ytr,yts = train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)
model = GradientBoostingClassifier().fit(Xtr,ytr)
auc = roc_auc_score(yts, model.predict_proba(Xts)[:,1])
st.title('🔮 ICU-stay prediction'); st.metric('AUC', f'{auc:.3f}')
"""))

print("✅  All Streamlit files written.")


✅  All Streamlit files written.


In [26]:
import pandas as pd, numpy as np, pathlib, shutil, os, textwrap, subprocess, time, random
FILE = "/content/CardioVascular disease data.xlsx"   # adjust if path differs
df   = pd.read_excel(FILE, sheet_name=0)

# minimal hygiene
df['year_month'] = pd.to_datetime(df['year_month'])
def clean_sex(c):
  tx = c.astype(str).str.strip().str.upper().replace({'MALE':'M','FEMALE':'F','FEM':'F'})
  return np.where(tx.isin(['M','F']), tx, 'UNKNOWN')
df['sex'] = pd.Categorical(clean_sex(df['sex']), categories=['M','F','UNKNOWN'])
for cat in ['age_band','residence']: df[cat] = df[cat].astype('category')

CSV_NAME = "CardioVascular disease data.csv"
df.to_csv(CSV_NAME, index=False); print("✅ saved:", CSV_NAME)


✅ saved: CardioVascular disease data.csv


In [27]:
PROJ = pathlib.Path("cvd_app"); (PROJ/"pages").mkdir(parents=True, exist_ok=True)
DATA = PROJ/"data"; DATA.mkdir(exist_ok=True)
shutil.copy(CSV_NAME, DATA/CSV_NAME)          # copy CSV into project

print("📂 project tree:")
for d,_,f in os.walk(PROJ): print(d, "→", f)


📂 project tree:
cvd_app → ['utils.py', 'Home.py']
cvd_app/data → ['CardioVascular disease data.csv']
cvd_app/__pycache__ → ['utils.cpython-311.pyc']
cvd_app/pages → ['2_Geography.py', '1_Demographics.py', '4_Prediction.py', '3_Subtypes.py']


In [28]:
%%writefile cvd_app/utils.py
import pandas as pd, functools, pathlib
DATA = pathlib.Path(__file__).parent / "data"
FCSV = "CardioVascular disease data.csv"

@functools.lru_cache(maxsize=1)
def load_monthly():
    return pd.read_csv(DATA/FCSV, parse_dates=['year_month'],
                       dtype={'sex':'category','age_band':'category','residence':'category'})

@functools.lru_cache(maxsize=1)
def load_patients():
    path = DATA/'patient_level.parquet'
    return pd.read_parquet(path) if path.exists() else None


Overwriting cvd_app/utils.py


In [29]:
%%writefile cvd_app/Home.py
import streamlit as st, plotly.express as px, pandas as pd
from utils import load_monthly

st.set_page_config("CVD Dashboard", layout="wide")
df = load_monthly()

# global filters (sidebar)
st.sidebar.header("Filters")
sex_f = st.sidebar.multiselect("Sex", ['M','F'], default=['M','F']) or ['M','F']
age_f = st.sidebar.multiselect("Age band", df.age_band.cat.categories, default=list(df.age_band.cat.categories))
res_f = st.sidebar.multiselect("Residence", sorted(df.residence.unique()), default=list(df.residence.unique()))
filt  = df[df.sex.isin(sex_f)&df.age_band.isin(age_f)&df.residence.isin(res_f)]

st.title("🫀 Cardiovascular Surgery Cohort – Lebanon")

latest = filt.year_month.max()
curr   = filt.loc[filt.year_month==latest,'cvd_cases'].sum()
prev   = filt.loc[filt.year_month==latest-pd.offsets.MonthEnd(),'cvd_cases'].sum()
delta  = (curr-prev)/prev if prev else 0

c1,c2 = st.columns(2)
c1.metric("CVD cases (latest month)", f"{curr:,}", f"{delta:+.1%}")
c2.metric("Time span", f"{filt.year_month.min():%Y-%m} → {latest:%Y-%m}")

trend = filt.groupby('year_month')['cvd_cases'].sum().reset_index()
fig = px.line(trend, x='year_month', y='cvd_cases', markers=True)
st.plotly_chart(fig, use_container_width=True)


Overwriting cvd_app/Home.py


In [30]:
%%writefile cvd_app/pages/1_Demographics.py
import streamlit as st, plotly.express as px
from utils import load_monthly
df = load_monthly()
sex_f = st.sidebar.session_state['sex']; age_f=st.sidebar.session_state['age_band']; res_f=st.sidebar.session_state['residence']
filt = df[df.sex.isin(sex_f)&df.age_band.isin(age_f)&df.residence.isin(res_f)]
st.title("👥 Age-sex pyramid")
yr = st.slider("Year", int(df.year_month.dt.year.min()), int(df.year_month.dt.year.max()), value=int(df.year_month.dt.year.max()))
sub = filt[filt.year_month.dt.year==yr]
pivot = (sub.pivot_table(index='age_band', columns='sex', values='cvd_cases', aggfunc='sum').fillna(0))
pivot['F'] = -pivot['F']
fig = px.bar(pivot, orientation='h', height=520, title=str(yr))
fig.update_layout(yaxis={'categoryorder':'array','categoryarray':pivot.index[::-1]})
st.plotly_chart(fig, use_container_width=True)


Overwriting cvd_app/pages/1_Demographics.py


In [31]:
!pip install streamlit plotly --quiet
import threading, os, time
PORT = 8501
def run():
    os.system(f"streamlit run cvd_app/Home.py --server.headless true --server.address 0.0.0.0 --server.port {PORT} --server.enableCORS false")
threading.Thread(target=run, daemon=True).start()
time.sleep(6)
from google.colab import output
url = output.eval_js(f"google.colab.kernel.proxyPort({PORT})")
print("🌍 Dashboard →", url)


🌍 Dashboard → https://8501-m-s-23cxn6k67g9vz-b.us-west1-1.prod.colab.dev


In [32]:
!git config --global user.email "you@example.com"
!git config --global user.name  "Your Name"
%cd cvd_app
!git init -q
!git add .
!git commit -qm "Initial dashboard"
TOKEN = "ghp_xxxYOURTOKENxxx"
USER  = "github-username"; REPO="cvd-dashboard"
!git remote add origin https://{TOKEN}@github.com/{USER}/{REPO}.git
!git branch -M main
!git push -q -u origin main
%cd ..


/content/cvd_app
fatal: could not read Password for 'https://ghp_xxxYOURTOKENxxx@github.com': No such device or address
/content


In [33]:
# 1-a  install git (usually on Colab already)
!sudo apt-get -q install git

# 1-b  tell git who you are for this session
!git config --global user.name  "YOUR NAME"
!git config --global user.email "YOUREMAIL@EXAMPLE.COM"

# 1-c  initialise a repo inside cvd_app/, commit, and push
import pathlib, getpass, os
proj = pathlib.Path("cvd_app")
assert proj.exists(), "❌ cvd_app folder not found – build it first!"

%cd cvd_app
!git init -q
!git add .
!git commit -qm "First commit – CVD Streamlit dashboard"

# 1-d  add remote & push (⚠️ replace the 3 placeholders below)
TOKEN = "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"         # your PAT
USER  = "your-github-username"
REPO  = "cvd-dashboard"

!git remote add origin https://{TOKEN}@github.com/{USER}/{REPO}.git
!git branch -M main
!git push -q -u origin main
%cd ..
print("✅  Repo pushed – check GitHub ->", f"https://github.com/{USER}/{REPO}")


Reading package lists...
Building dependency tree...
Reading state information...
git is already the newest version (1:2.34.1-1ubuntu1.12).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
/content/cvd_app
On branch main
nothing to commit, working tree clean
error: remote origin already exists.
fatal: could not read Password for 'https://ghp_xxxYOURTOKENxxx@github.com': No such device or address
/content
✅  Repo pushed – check GitHub -> https://github.com/your-github-username/cvd-dashboard


In [34]:
%cd cvd_app                   # make sure you’re in project folder
!git remote remove origin     # delete the bad remote

TOKEN = "ghp_realPATfromGitHub"      # <<< paste your real token
USER  = "your-github-username"
REPO  = "cvd-dashboard"

!git remote add origin https://{TOKEN}@github.com/{USER}/{REPO}.git
!git push -u origin main
%cd ..


[Errno 2] No such file or directory: 'cvd_app # make sure you’re in project folder'
/content
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
/


In [35]:
!zip -rq cvd_app.zip cvd_app



zip error: Nothing to do! (try: zip -rq cvd_app.zip . -i cvd_app)


In [37]:
# file ipython-input-36-3952797126
import os # Import the os module
from google.colab import files

zip_filename = 'cvd_app.zip'

# Check if the zip file was created successfully
if os.path.exists(zip_filename):
    files.download(zip_filename)
    print(f"✅ Download started for {zip_filename}")
else:
    print(f"❌ Error: Could not find {zip_filename}. Zip creation likely failed.")


❌ Error: Could not find cvd_app.zip. Zip creation likely failed.
