# JBI100 Visualization 
### Academic year 2025-2026

## Health and Global Statistics
Data sources:

- Hospital Beds Management (https://www.kaggle.com/datasets/jaderz/hospital-beds-management/data)
- CIA Global Statistical Database (https://www.kaggle.com/datasets/kushagraarya10/cia-global-statistical-database)

Data dictionaries and additional info can be found in the respective data folders.
Note: you only need to select one dataset for your project; the dataset that you choose consists of multiple CSV files.

In [13]:
# Import libraries
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
import plotly.io as pio

# Render plotly in IFrame
pio.renderers.default = 'iframe'

# Do not truncate tables
pd.set_option('display.max_columns', None)

# If you receive a 'ModuleNotFoundError' please install the according library. 
# This can be done from within the Jupyter environment with the command 
#'!python -m pip install lib' where lib is the according library name.

In [14]:
# Load the data

# Hospital Beds Management
# df_HBM_patients        = pd.read_csv('../Hospital Beds Management/patients.csv', delimiter=',', low_memory=False)
# df_HBM_staff           = pd.read_csv('../Hospital Beds Management/staff.csv', delimiter=',', low_memory=False)
# df_HBM_staff_schedule  = pd.read_csv('../Hospital Beds Management/staff_schedule.csv', delimiter=',', low_memory=False)
# df_HBM_services_weekly = pd.read_csv('../Hospital Beds Management/services_weekly.csv', delimiter=',', low_memory=False)

# CIA Global Statistical Database
df_CIA_communications        = pd.read_csv("C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\communications_data.csv", delimiter=',', low_memory=False)
df_CIA_demographics          = pd.read_csv('C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\demographics_data.csv', delimiter=',', low_memory=False)
df_CIA_economy               = pd.read_csv('C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\economy_data.csv', delimiter=',', low_memory=False)
df_CIA_energy                = pd.read_csv('C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\energy_data.csv', delimiter=',', low_memory=False)
df_CIA_geography             = pd.read_csv('C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\geography_data.csv', delimiter=',', low_memory=False)
df_CIA_government_and_civics = pd.read_csv('C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\government_and_civics_data.csv', delimiter=',', low_memory=False)
df_CIA_transportation        = pd.read_csv('C:\\Users\\kwane\\PycharmProjects\\dashframework\\CIA Global Statistical Database\\transportation_data.csv', delimiter=',', low_memory=False)

## Explore Hospital Beds Management

In [15]:
# df_HBM_patients.sample(5)

In [16]:
# df_HBM_patients.describe()

In [17]:
# fig = px.scatter(df_HBM_patients, x="age", y="satisfaction", color="service", width=1000, height=800)
# fig.show()

## Explore CIA Global Statistical Database

In [18]:
df_CIA_demographics.sample(5)

Unnamed: 0,Country,Total_Population,Population_Growth_Rate,Birth_Rate,Death_Rate,Net_Migration_Rate,Median_Age,Sex_Ratio,Infant_Mortality_Rate,Total_Fertility_Rate,Total_Literacy_Rate,Male_Literacy_Rate,Female_Literacy_Rate,Youth_Unemployment_Rate
36,BRUNEI,484991,1.43%,15.98,3.85,2.15,31.1,0.95,10.26,1.74,97.6%,30%,2.3%,23.4%
157,MOROCCO,37067420,0.88%,17.1,6.61,1.73,29.1,1.0,18.73,2.27,75.9%,28.2%,0.8%,27.2%
136,LIBYA,7252573,1.54%,20.88,3.45,2.05,25.8,1.04,10.95,3.04,91%,96.7%,85.6%,50.5%
242,UNITED ARAB EMIRATES,9973449,0.58%,10.8,1.6,3.4,35.7,2.16,5.1,1.62,98.1%,98.8%,97.2%,10.7%
192,SAINT BARTHELEMY,7093,0.11%,9.3,9.2,1.3,47.0,1.12,6.6,1.64,,,,


In [19]:
df_CIA_demographics.describe()

Unnamed: 0,Birth_Rate,Death_Rate,Net_Migration_Rate,Median_Age,Sex_Ratio,Infant_Mortality_Rate,Total_Fertility_Rate
count,228.0,230.0,229.0,227.0,227.0,227.0,227.0
mean,26.36,25.02,3.66,31.83,1.0,19.37,11.29
std,132.91,187.37,5.57,9.24,0.19,18.43,133.98
min,5.8,1.42,-2.4,14.8,0.83,1.5,1.09
25%,10.8,5.7,0.63,24.05,0.95,5.76,1.68
50%,15.03,7.26,1.89,31.7,0.98,12.0,1.97
75%,22.23,9.1,4.4,39.5,1.01,28.52,2.8
max,2020.0,2021.0,45.8,55.4,3.34,103.06,2021.0


In [20]:
fig = px.bar(df_CIA_demographics, x="Country", y="Total_Population", color="Median_Age", color_continuous_scale='Viridis', log_y=True,width=1500, height=800).update_xaxes(categoryorder="total descending")
fig.show()

In [21]:
# df_CIA_communications, df_CIA_demographics, df_CIA_economy, df_CIA_energy, df_CIA_geography, df_CIA_government_and_civics, df_CIA_transportation
# df_CIA_communications.columns
# df_CIA_communications["Country"]

# Check if all Country columns are the same
print(df_CIA_communications["Country"].equals(df_CIA_demographics["Country"]))
print(df_CIA_communications["Country"].equals(df_CIA_economy["Country"]))
print(df_CIA_communications["Country"].equals(df_CIA_energy["Country"]))
print(df_CIA_communications["Country"].equals(df_CIA_geography["Country"]))
print(df_CIA_communications["Country"].equals(df_CIA_government_and_civics["Country"]))
print(df_CIA_communications["Country"].equals(df_CIA_transportation["Country"]))

# Check what is missing in the df_CIA_government_and_civics dataset
for i in range(len(df_CIA_communications["Country"])):
    if df_CIA_communications["Country"][i] not in df_CIA_government_and_civics["Country"].values:
        print(df_CIA_communications["Country"][i])

# Merge datasets
df_total = pd.merge(df_CIA_communications, df_CIA_demographics, on="Country")
df_total = pd.merge(df_total, df_CIA_economy, on="Country")
df_total = pd.merge(df_total, df_CIA_energy, on="Country")
df_total = pd.merge(df_total, df_CIA_geography, on="Country")
df_total = pd.merge(df_total, df_CIA_government_and_civics, on="Country")
df_total = pd.merge(df_total, df_CIA_transportation, on="Country")



True
True
True
True
False
True
BRITISH INDIAN OCEAN TERRITORY
HAITI
NAMIBIA
NEW ZEALAND
SPAIN
SPRATLY ISLANDS


In [22]:
# Check for NaNs and make a dict with number of NaNs per cat
nan_dict = {}
for i in df_total.columns:
    nan_count = df_total[i].isna().sum()
    if nan_count > 0:
        nan_dict[i] = nan_count

# Print NaNs dictionary
for key, value in sorted(nan_dict.items(), key=lambda x: x[1], reverse=True):
    print(f"{key:<50} : {value:>5} NaNs")

# Look at the cat with the most NaNs (water_pipelines_km)
# print(df_CIA_transportation.columns)
# for i in df_CIA_transportation["water_pipelines_km"]:
#     if not pd.isna(i):
#         print(i)
# print(df_CIA_transportation["water_pipelines_km"].isna().sum())
# print(len(df_CIA_transportation["water_pipelines_km"]))



water_pipelines_km                                 :   250 NaNs
refined_products_pipelines_km                      :   245 NaNs
oil_pipelines_km                                   :   170 NaNs
gas_pipelines_km                                   :   146 NaNs
heliports_count                                    :   142 NaNs
waterways_km                                       :   137 NaNs
natural_gas_cubic_meters                           :   132 NaNs
coal_metric_tons                                   :   131 NaNs
refined_petroleum_products_bbl_per_day             :   131 NaNs
railways_km                                        :   126 NaNs
refined_petroleum_exports_bbl_per_day              :   117 NaNs
Total_Literacy_Rate                                :    89 NaNs
Population_Below_Poverty_Line_percent              :    79 NaNs
GDP_Official_Exchange_Rate_billion_USD             :    64 NaNs
petroleum_bbl_per_day                              :    52 NaNs
Youth_Unemployment_Rate                 

In [23]:
# Make a summarize function
def summarize(df: pd.DataFrame):
    print(df_total.describe())
    if key in nan_dict.keys():
        print(nan_dict[key])

pd.set_option("display.float_format", "{:.2f}".format)
print(df_total.describe())

       telephone_fixed_subscriptions_total  \
count                               218.00   
mean                            8956314.12   
std                            63643773.59   
min                                   0.00   
25%                               26453.25   
50%                              268370.50   
75%                             2121875.00   
max                           901317598.00   

       mobile_cellular_subscriptions_total  \
count                               215.00   
mean                           42479932.24   
std                           155800059.92   
min                                   0.00   
25%                              742025.50   
50%                             7000000.00   
75%                            22964500.00   
max                          1750460000.00   

       broadband_fixed_subscriptions_total  Birth_Rate  Death_Rate  \
count                               203.00      224.00      226.00   
mean                          

In [24]:
df_total.sample(5)

Unnamed: 0,Country,telephone_fixed_subscriptions_total,mobile_cellular_subscriptions_total,internet_country_code,internet_users_total,broadband_fixed_subscriptions_total,Total_Population,Population_Growth_Rate,Birth_Rate,Death_Rate,Net_Migration_Rate,Median_Age,Sex_Ratio,Infant_Mortality_Rate,Total_Fertility_Rate,Total_Literacy_Rate,Male_Literacy_Rate,Female_Literacy_Rate,Youth_Unemployment_Rate,Real_GDP_PPP_billion_USD,GDP_Official_Exchange_Rate_billion_USD,Real_GDP_Growth_Rate_percent,Real_GDP_per_Capita_USD,Unemployment_Rate_percent,Youth_Unemployment_Rate_percent,Budget_billion_USD,Budget_Surplus_billion_USD,Budget_Deficit_percent_of_GDP,Public_Debt_percent_of_GDP,Fiscal_Year,Exports_billion_USD,Imports_billion_USD,Exchange_Rate_per_USD,Population_Below_Poverty_Line_percent,electricity_access_percent,electricity_generating_capacity_kW,coal_metric_tons,petroleum_bbl_per_day,refined_petroleum_products_bbl_per_day,refined_petroleum_exports_bbl_per_day,refined_petroleum_imports_bbl_per_day,natural_gas_cubic_meters,carbon_dioxide_emissions_Mt,Geographic_Coordinates,Area_Total,Land_Area,Water_Area,Land_Boundaries,Coastline,Highest_Elevation,Lowest_Elevation,Forest_Land,Other_Land,Agricultural_Land,Arable_Land (%% of Total Agricultural Land),Permanent_Crops (%% of Total Agricultural Land),Permanent_Pasture (%% of Total Agricultural Land),Irrigated_Land,Capital,Capital_Coordinates,Government_Type,Suffrage_Age,airports_paved_runways_count,airports_unpaved_runways_count,heliports_count,roadways_km,railways_km,waterways_km,gas_pipelines_km,oil_pipelines_km,refined_products_pipelines_km,water_pipelines_km
89,GERMANY,38800000.0,107200000.0,.de,79127551.0,37736000.0,84220184.0,0.12%,9.02,11.97,1.78,47.8,0.98,3.14,1.58,,24.1%,19.9%,7%,4260.0,4230.0,1.8,51200.0,3.0,7.5,1797.0,12.8,,63.9,calendar year,1813.0,1665.0,0.85,14.8,100.0,218000000.0,114860000.0,135000.0,2158000.0,494000.0,883800.0,5129000000.0,726881000.0,"51 00 N, 9 00 E","357,022 sq km","348,672 sq km","8,350 sq km","3,694 km","2,389 km","2,963 m",-3.5 m,31.8%,20.2%,48%,34.1%,0.6%,13.3%,"5,056 sq km",Berlin,"52 31 N, 13 24 E",Republic,18.0,318.0,221.0,23.0,830000.0,39379.0,7300.0,26985.0,2400.0,4479.0,8.0
214,SUDAN,129408.0,16688773.0,.sd,13248000.0,28782.0,49197555.0,2.55%,33.3,6.2,1.6,19.1,1.01,41.4,4.54,60.7%,65.4%,56.1%,35.6%,168.98,24.92,-1.87,3700.0,19.81,35.6,3.48,10.6,-10.6,121.6,calendar year,5.92,9.79,54.0,46.5,61.0,4354000.0,,66900.0,94830.0,8541.0,24340.0,,17.0,"15 00 N, 30 00 E","1,861,484 sq km","1,731,671 sq km","129,813 sq km","6,819 km",853 km,"3,042 m",0 m,0%,0%,100%,15.7%,0.2%,84.2%,"15,666 sq km",Khartoum,"15 36 N, 32 32 E",Republic,17.0,17.0,50.0,7.0,31000.0,7251.0,4068.0,156.0,4070.0,,
65,DHEKELIA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"34 59 N, 33 45 E",131 sq km,,,108 km,27.5 km,,,,,,,,,,Episkopi Cantonment,"34 40 N, 32 51 E",,,,,,,,,,,,
200,SERBIA,2540276.0,8501519.0,.rs,5589000.0,1730496.0,6693375.0,0.63%,8.9,15.1,0.0,43.7,0.95,4.6,1.46,99.5%,40.5%,39.1%,30.4%,135.53,51.45,7.55,19800.0,11.81,30.4,21.86,0.2,0.2,62.5,,33.73,39.04,99.4,23.2,100.0,8986000.0,39673000.0,15200.0,74350.0,15750.0,18720.0,455787000.0,47.0,"44 00 N, 21 00 E","77,474 sq km","77,474 sq km",0 sq km,"2,322 km",0 km,"2,169 m",35 m,31.6%,10.5%,57.9%,37.7%,3.4%,16.8%,520 sq km,Belgrade,"44 50 N, 20 30 E",Republic,18.0,10.0,16.0,2.0,44248.0,3333.0,587.0,1936.0,413.0,,
45,CENTRAL AFRICAN REPUBLIC,2000.0,1800000.0,.cf,605000.0,499.0,5552228.0,1.77%,32.37,11.51,3.16,20.0,0.99,81.74,3.99,37.4%,49.5%,25.8%,11.8%,4.48,,0.9,800.0,6.57,11.8,418.0,0.9,-0.9,52.9,calendar year,113.7,393.1,554.53,62.0,15.0,38000.0,,0.0,,,2799.0,,285000.0,"7 00 N, 21 00 E","622,984 sq km","622,984 sq km",0 sq km,"5,920 km",0 km,"1,410 m",335 m,36.2%,55.7%,8.1%,2.9%,0.1%,5.1%,10 sq km,Bangui,"4 22 N, 18 35 E",,18.0,1.0,37.0,,24000.0,,2800.0,,,,
