<a href="https://colab.research.google.com/github/AsSakina/Code_Projects/blob/main/MedGen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **MedGen**

## **Generate Random Data**

In [None]:
# Import libraries

import json
import os
import random
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

### **Generate Date**

In [None]:
# Generate random date within range
def random_date(start, end):
    delta = end - start
    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
    random_second = random.randrange(int_delta)
    return start + timedelta(seconds=random_second)

In [None]:
#test
random_date(datetime.strptime('1/1/2020 1:30 PM', '%m/%d/%Y %I:%M %p'), datetime.strptime('1/1/2021 4:50 AM', '%m/%d/%Y %I:%M %p'))

datetime.datetime(2020, 6, 24, 4, 57, 14)

In [None]:
# Format date to YYYY-MM-DD
def format_date(date):
    return date.strftime('%Y-%m-%d')

# format_date(date)

### **Generate patient data**

Génère **1000 patients** avec les éléments suivants :

- age, gender, department, treatment, etc.

- admissionDate : date d’admission (aléatoire sur les 90 derniers jours).

- dischargeDate : nulle si le patient est toujours hospitalisé.

- stayDuration : soit la durée réelle, soit la durée depuis l’admission.

- outcome : s'il est sorti, résultat de traitement pondéré (60% rétabli…).

- treatmentCost : calcul basé sur un coût de base (département + traitement).

- insuranceCovered : % aléatoire de couverture (entre 70% et 95% environ).

Le **coût total** du traitement dépend de :

- Le département (coût de base)

- Le type de traitement (ajoute des surcoûts pour chirurgie ou soins intensifs)

- La durée du séjour

In [None]:
# Generate patient data
def generate_patient_data(days=90):
    patients = []
    now = datetime.now()
    departments = ['Cardiology', 'Neurology', 'Oncology', 'Pediatrics', 'Emergency', 'Surgery']
    treatments = ['Medication', 'Surgery', 'Therapy', 'Observation', 'Intensive Care']
    outcomes = ['Recovered', 'Improved', 'Stable', 'Deteriorated', 'Deceased']
    outcome_weights = [0.6, 0.2, 0.1, 0.07, 0.03]  # Probability weights

    for i in range(1000):
        admission_date = random_date(now - timedelta(days=days), now)
        stay_duration = random.randint(1, 30)
        discharge_date = admission_date + timedelta(days=stay_duration)

        # Determine if patient is still admitted
        is_admitted = discharge_date > now

        department = random.choice(departments)
        treatment = random.choice(treatments)

        # Weighted random outcome selection
        if is_admitted:
            outcome = 'In Treatment'
        else:
            outcome = random.choices(outcomes, weights=outcome_weights)[0]

        # Generate costs based on department and treatment
        base_cost = {
            'Cardiology': 1500,
            'Neurology': 1800,
            'Oncology': 2200,
            'Pediatrics': 1000,
            'Emergency': 2000,
            'Surgery': 3000
        }.get(department, 1200)

        if treatment == 'Surgery':
            base_cost += 5000
        if treatment == 'Intensive Care':
            base_cost += 3000

        treatment_cost = base_cost * (0.8 + random.random() * 0.4) * stay_duration

        patients.append({
            'patientId': f'P{1000 + i}',
            'age': random.randint(1, 95),
            'gender': random.choice(['Male', 'Female']),
            'department': department,
            'admissionDate': format_date(admission_date),
            'dischargeDate': None if is_admitted else format_date(discharge_date),
            'stayDuration': (now - admission_date).days if is_admitted else stay_duration,
            'treatment': treatment,
            'outcome': outcome,
            'treatmentCost': round(treatment_cost),
            'insuranceCovered': round(treatment_cost * (0.7 + random.random() * 0.25)),
            'isAdmitted': is_admitted
        })

    return patients

In [None]:
patient_data = generate_patient_data()
pd.DataFrame(patient_data)

Unnamed: 0,patientId,age,gender,department,admissionDate,dischargeDate,stayDuration,treatment,outcome,treatmentCost,insuranceCovered,isAdmitted
0,P1000,24,Male,Cardiology,2025-05-08,2025-05-20,12,Medication,Recovered,16338,14789,False
1,P1001,47,Female,Emergency,2025-07-07,2025-07-13,6,Observation,Improved,11365,9534,False
2,P1002,21,Female,Neurology,2025-04-27,2025-05-11,14,Surgery,Recovered,98573,86644,False
3,P1003,93,Male,Pediatrics,2025-05-31,2025-06-14,14,Observation,Improved,12892,11894,False
4,P1004,70,Male,Pediatrics,2025-06-22,2025-06-26,4,Surgery,Stable,26001,21528,False
...,...,...,...,...,...,...,...,...,...,...,...,...
995,P1995,78,Female,Oncology,2025-06-18,2025-06-23,5,Therapy,Deteriorated,13175,9938,False
996,P1996,53,Male,Cardiology,2025-04-29,2025-05-14,15,Intensive Care,Recovered,77176,61686,False
997,P1997,80,Female,Pediatrics,2025-05-15,2025-05-22,7,Therapy,Recovered,7504,5779,False
998,P1998,18,Male,Emergency,2025-05-06,2025-06-03,28,Intensive Care,Recovered,114590,108261,False


### **Generate Staff Data**

Crée **200 membres** du personnel :

- role : Docteur, infirmier, technicien, etc.

- salary : dépend du rôle, du département, et de l’ancienneté (yearsOfService)

- performanceScore : score entre 0.70 et 1.00.

- patientsHandled : uniquement pour les rôles médicaux (doctor, nurse, etc.).

Le salaire est ajusté selon :

- Un coefficient pour les départements comme Chirurgie ou Cardiologie (x1.2)

- Une augmentation de 2% par année d’ancienneté

In [None]:
# Generate staff data
def generate_staff_data():
    departments = ['Cardiology', 'Neurology', 'Oncology', 'Pediatrics', 'Emergency', 'Surgery', 'Administration']
    roles = ['Doctor', 'Nurse', 'Technician', 'Administrative', 'Support']
    staff = []

    for i in range(200):
        department = random.choice(departments)
        role = random.choice(roles)

        # Base salary by role
        base_salary = {
            'Doctor': 120000,
            'Nurse': 70000,
            'Technician': 60000,
            'Administrative': 50000,
            'Support': 40000
        }.get(role, 45000)

        # Adjust by department
        if department in ['Surgery', 'Cardiology']:
            base_salary *= 1.2

        years_of_service = random.randint(0, 30)
        # Salary increases with years of service
        salary = round(base_salary * (1 + years_of_service * 0.02))

        staff.append({
            'staffId': f'S{1000 + i}',
            'department': department,
            'role': role,
            'yearsOfService': years_of_service,
            'salary': salary,
            'patientsHandled': random.randint(10, 100) if role in ['Doctor', 'Nurse'] else 0,
            'performanceScore': round(random.uniform(70, 100) / 100, 2)
        })

    return staff

In [None]:
staff_data = generate_staff_data()
pd.DataFrame(staff_data)

Unnamed: 0,staffId,department,role,yearsOfService,salary,patientsHandled,performanceScore
0,S1000,Emergency,Administrative,2,52000,0,0.86
1,S1001,Pediatrics,Support,8,46400,0,0.84
2,S1002,Cardiology,Doctor,16,190080,68,0.87
3,S1003,Cardiology,Nurse,1,85680,53,0.81
4,S1004,Surgery,Administrative,18,81600,0,0.98
...,...,...,...,...,...,...,...
195,S1195,Administration,Support,17,53600,0,0.77
196,S1196,Surgery,Administrative,4,64800,0,0.79
197,S1197,Surgery,Nurse,8,97440,17,0.73
198,S1198,Oncology,Technician,30,96000,0,0.86


### **Generate Departement Data**

Pour chaque département (**6 au total**) :

- Patients : combien, durée moyenne de séjour, taux de guérison

- Finances : revenu total (somme des coûts de traitement), salaires totaux, coût d’exploitation

- Ressources : lits disponibles, lits occupés (bed utilization), nombre de staff

- Performance médicale : taux de guérison (patients sortis guéris ou améliorés)

In [None]:
# Generate department performance data
def generate_department_data(patient_data, staff_data):
    departments = ['Cardiology', 'Neurology', 'Oncology', 'Pediatrics', 'Emergency', 'Surgery']
    department_data = []

    for dept in departments:
        dept_patients = [p for p in patient_data if p['department'] == dept]
        dept_staff = [s for s in staff_data if s['department'] == dept]

        total_patients = len(dept_patients)
        if total_patients > 0:
            avg_stay_duration = sum(p['stayDuration'] for p in dept_patients) / total_patients
        else:
            avg_stay_duration = 0

        total_revenue = sum(p['treatmentCost'] for p in dept_patients)
        total_salaries = sum(s['salary'] for s in dept_staff)

        # Calculate recovery rate
        non_admitted = [p for p in dept_patients if not p['isAdmitted']]
        recovered_patients = len([p for p in non_admitted if p['outcome'] in ['Recovered', 'Improved']])
        recovery_rate = recovered_patients / len(non_admitted) if len(non_admitted) > 0 else 0

        # Calculate bed utilization
        beds_available = {
            'Cardiology': 50,
            'Neurology': 40,
            'Oncology': 60,
            'Pediatrics': 45,
            'Emergency': 30,
            'Surgery': 35
        }[dept]

        currently_admitted = len([p for p in dept_patients if p['isAdmitted']])
        bed_utilization = currently_admitted / beds_available

        department_data.append({
            'department': dept,
            'totalPatients': total_patients,
            'avgStayDuration': round(avg_stay_duration, 2),
            'totalRevenue': total_revenue,
            'totalSalaries': total_salaries,
            'operatingCost': total_salaries + random.randint(50000, 200000),  # Add other costs
            'recoveryRate': round(recovery_rate, 2),
            'bedsAvailable': beds_available,
            'currentlyAdmitted': currently_admitted,
            'bedUtilization': round(bed_utilization, 2),
            'staffCount': len(dept_staff),
            'doctorCount': len([s for s in dept_staff if s['role'] == 'Doctor']),
            'nurseCount': len([s for s in dept_staff if s['role'] == 'Nurse'])
        })

    return department_data

In [None]:
department_data = generate_department_data(patient_data, staff_data)
pd.DataFrame(department_data)

Unnamed: 0,department,totalPatients,avgStayDuration,totalRevenue,totalSalaries,operatingCost,recoveryRate,bedsAvailable,currentlyAdmitted,bedUtilization,staffCount,doctorCount,nurseCount
0,Cardiology,178,13.97,8235256,2515440,2572241,0.75,50,27,0.54,23,5,3
1,Neurology,159,13.95,7945112,2749600,2822894,0.81,40,23,0.57,27,8,6
2,Oncology,153,13.27,8931964,2679400,2816209,0.86,60,28,0.47,33,5,7
3,Pediatrics,184,13.76,7166373,2313800,2425415,0.83,45,31,0.69,25,6,6
4,Emergency,157,14.34,9655686,2413800,2526128,0.76,30,22,0.73,27,8,2
5,Surgery,169,13.38,12042518,2849760,2975856,0.8,35,26,0.74,30,4,6


### **Generate Daily Metrics**

Génère des données aléatoires pour 90 jours :

- newAdmissions, discharges, emergencyVisits, surgeries, revenue, expenses

Cela permet d’**analyser l’activité jour par jour** sous forme de **séries temporelles**.



In [None]:
# Generate daily metrics for time series
def generate_daily_metrics(days=90):
    daily_metrics = []
    now = datetime.now()

    for i in range(days):
        date = now - timedelta(days=days-i)

        daily_metrics.append({
            'date': format_date(date),
            'newAdmissions': random.randint(5, 25),
            'discharges': random.randint(5, 20),
            'emergencyVisits': random.randint(20, 60),
            'surgeries': random.randint(3, 15),
            'revenue': random.randint(50000, 150000),
            'expenses': random.randint(40000, 120000)
        })

    return daily_metrics

In [None]:
daily_metrics_data = generate_daily_metrics()
pd.DataFrame(daily_metrics_data)

Unnamed: 0,date,newAdmissions,discharges,emergencyVisits,surgeries,revenue,expenses
0,2025-04-22,12,9,30,13,92518,48396
1,2025-04-23,11,5,22,7,122451,84726
2,2025-04-24,18,6,27,10,110933,40894
3,2025-04-25,16,16,58,13,72155,48917
4,2025-04-26,14,17,37,9,116736,71829
...,...,...,...,...,...,...,...
85,2025-07-16,25,15,46,12,119776,80427
86,2025-07-17,21,6,31,7,97122,48675
87,2025-07-18,7,11,43,9,115615,114655
88,2025-07-19,11,6,60,3,131926,76333


### **Generate all data (Skip it)**

This part will generate all data and save it to JSON files.

- Appelle les 4 fonctions ci-dessus

- Crée un dossier ./data si nécessaire

- Sauvegarde chaque jeu de données dans un fichier .json

  - patients.json

  - staff.json

  - departments.json

  - daily_metrics.json

- Affiche un résumé du nombre d'enregistrements générés

In [None]:
# Generate all data and save to JSON files
def generate_all_data():
    patient_data = generate_patient_data()
    staff_data = generate_staff_data()
    department_data = generate_department_data(patient_data, staff_data)
    daily_metrics = generate_daily_metrics()

    # Create data directory if it doesn't exist
    if not os.path.exists('./data'):
        os.makedirs('./data')

    with open('./data/patients.json', 'w') as f:
        json.dump(patient_data, f, indent=2)

    with open('./data/staff.json', 'w') as f:
        json.dump(staff_data, f, indent=2)

    with open('./data/departments.json', 'w') as f:
        json.dump(department_data, f, indent=2)

    with open('./data/daily_metrics.json', 'w') as f:
        json.dump(daily_metrics, f, indent=2)

    print('Generated fictive health structure data:')
    print(f'- {len(patient_data)} patient records')
    print(f'- {len(staff_data)} staff records')
    print(f'- {len(department_data)} department performance records')
    print(f'- {len(daily_metrics)} days of daily metrics')
    print('Data saved to ./data/ directory')

### **Exécution**

Cela permet d’exécuter le script uniquement lorsqu'il est lancé directement (et non importé comme module).

In [None]:
if __name__ == "__main__":
    generate_all_data()


Generated fictive health structure data:
- 1000 patient records
- 200 staff records
- 6 department performance records
- 90 days of daily metrics
Data saved to ./data/ directory


## **Data Pre-processing**

### **Load Data**

In [None]:
# Load Patients data

patient_data = pd.read_json('./data/patients.json')

In [None]:
# Load Staff data

staff_data = pd.read_json('./data/staff.json')

In [None]:
# Load Departments data

department_data = pd.read_json('./data/departments.json')

In [None]:
# Load Daily metrics data

metrics_data = pd.read_json('./data/daily_metrics.json')

### **Display datasets**

In [None]:
# Patients Data

patient_data.head()

Unnamed: 0,patientId,age,gender,department,admissionDate,dischargeDate,stayDuration,treatment,outcome,treatmentCost,insuranceCovered,isAdmitted
0,P1000,50,Male,Cardiology,2025-05-29,2025-06-20,22,Observation,Recovered,31607,29709,False
1,P1001,20,Male,Emergency,2025-05-03,2025-05-05,2,Therapy,Deceased,3985,3056,False
2,P1002,67,Female,Cardiology,2025-06-20,2025-07-16,26,Intensive Care,Deteriorated,96399,90888,False
3,P1003,37,Male,Neurology,2025-05-28,2025-06-24,27,Intensive Care,Improved,136722,98642,False
4,P1004,47,Female,Surgery,2025-06-04,2025-07-03,29,Intensive Care,Recovered,148673,137675,False


In [None]:
# Staff Data

staff_data.head()

Unnamed: 0,staffId,department,role,yearsOfService,salary,patientsHandled,performanceScore
0,S1000,Emergency,Nurse,17,93800,79,0.82
1,S1001,Surgery,Nurse,29,132720,46,0.81
2,S1002,Surgery,Administrative,27,92400,0,0.91
3,S1003,Surgery,Administrative,18,81600,0,0.88
4,S1004,Surgery,Nurse,30,134400,44,0.96


In [None]:
# Department Data

department_data.head()

Unnamed: 0,department,totalPatients,avgStayDuration,totalRevenue,totalSalaries,operatingCost,recoveryRate,bedsAvailable,currentlyAdmitted,bedUtilization,staffCount,doctorCount,nurseCount
0,Cardiology,161,13.2,7985903,2611440,2798682,0.8,50,28,0.56,27,3,5
1,Neurology,168,14.65,9111593,2556000,2741313,0.84,40,40,1.0,26,7,9
2,Oncology,168,13.49,9909357,3603200,3789201,0.84,60,26,0.43,36,11,10
3,Pediatrics,177,13.01,7541597,3002000,3059114,0.79,45,37,0.82,32,7,8
4,Emergency,170,14.53,9767089,2680200,2739548,0.76,30,27,0.9,28,8,4


In [None]:
# Daily Metrics Data

metrics_data.head()

Unnamed: 0,date,newAdmissions,discharges,emergencyVisits,surgeries,revenue,expenses
0,2025-04-22,22,8,60,13,120038,77681
1,2025-04-23,23,8,34,13,106975,65743
2,2025-04-24,6,9,58,9,134386,68412
3,2025-04-25,17,5,38,12,112336,54216
4,2025-04-26,15,17,29,15,70154,112666


### **Data Explorer**

In [None]:
# Data Explorer

def explore_info(data):
  print("\n--- Infos sur le DataFrame ---")
  print("====================================")
  print("     ")
  print("     ")
  data.info()

def describe_data(data):
  print("\n--- Statistiques descriptives ---")
  print("====================================")
  print("     ")
  print("     ")

  print(data.describe(include='all'))

def unique_values(data):
  print("\n--- Valeurs uniques par colonne ---")
  print("====================================")
  print("     ")
  print("     ")
  for col in data.columns:
    print(f"{col} : {data[col].nunique()} valeurs uniques")
    print("---------------------------------------")

#### **Patient**

In [None]:
# Informations

explore_info(patient_data)


--- Infos sur le DataFrame ---
     
     
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   patientId         1000 non-null   object
 1   age               1000 non-null   int64 
 2   gender            1000 non-null   object
 3   department        1000 non-null   object
 4   admissionDate     1000 non-null   object
 5   dischargeDate     819 non-null    object
 6   stayDuration      1000 non-null   int64 
 7   treatment         1000 non-null   object
 8   outcome           1000 non-null   object
 9   treatmentCost     1000 non-null   int64 
 10  insuranceCovered  1000 non-null   int64 
 11  isAdmitted        1000 non-null   bool  
dtypes: bool(1), int64(4), object(7)
memory usage: 87.0+ KB


In [None]:
# Descriptive Statistics

describe_data(patient_data)


--- Statistiques descriptives ---
     
     
       patientId          age  gender  department admissionDate dischargeDate  \
count       1000  1000.000000    1000        1000          1000           819   
unique      1000          NaN       2           6            91            88   
top        P1999          NaN  Female  Pediatrics    2025-06-01    2025-06-29   
freq           1          NaN     520         177            19            20   
mean         NaN    48.751000     NaN         NaN           NaN           NaN   
std          NaN    28.073673     NaN         NaN           NaN           NaN   
min          NaN     1.000000     NaN         NaN           NaN           NaN   
25%          NaN    24.000000     NaN         NaN           NaN           NaN   
50%          NaN    49.000000     NaN         NaN           NaN           NaN   
75%          NaN    73.000000     NaN         NaN           NaN           NaN   
max          NaN    95.000000     NaN         NaN           Na

In [None]:
# Explore Unique values

unique_values(patient_data)


--- Valeurs uniques par colonne ---
     
     
patientId : 1000 valeurs uniques
---------------------------------------
age : 95 valeurs uniques
---------------------------------------
gender : 2 valeurs uniques
---------------------------------------
department : 6 valeurs uniques
---------------------------------------
admissionDate : 91 valeurs uniques
---------------------------------------
dischargeDate : 88 valeurs uniques
---------------------------------------
stayDuration : 31 valeurs uniques
---------------------------------------
treatment : 5 valeurs uniques
---------------------------------------
outcome : 6 valeurs uniques
---------------------------------------
treatmentCost : 995 valeurs uniques
---------------------------------------
insuranceCovered : 991 valeurs uniques
---------------------------------------
isAdmitted : 2 valeurs uniques
---------------------------------------


#### **Staff**

In [None]:
# Informations

explore_info(staff_data)


--- Infos sur le DataFrame ---
     
     
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   staffId           200 non-null    object 
 1   department        200 non-null    object 
 2   role              200 non-null    object 
 3   yearsOfService    200 non-null    int64  
 4   salary            200 non-null    int64  
 5   patientsHandled   200 non-null    int64  
 6   performanceScore  200 non-null    float64
dtypes: float64(1), int64(3), object(3)
memory usage: 11.1+ KB


In [None]:
# Descriptive Statistics

describe_data(staff_data)


--- Statistiques descriptives ---
     
     
       staffId department   role  yearsOfService         salary  \
count      200        200    200      200.000000     200.000000   
unique     200          7      5             NaN            NaN   
top      S1000   Oncology  Nurse             NaN            NaN   
freq         1         36     45             NaN            NaN   
mean       NaN        NaN    NaN       14.855000   95104.400000   
std        NaN        NaN    NaN        8.945619   41197.301329   
min        NaN        NaN    NaN        0.000000   40800.000000   
25%        NaN        NaN    NaN        7.000000   64680.000000   
50%        NaN        NaN    NaN       15.500000   84000.000000   
75%        NaN        NaN    NaN       22.000000  110080.000000   
max        NaN        NaN    NaN       30.000000  195840.000000   

        patientsHandled  performanceScore  
count         200.00000        200.000000  
unique              NaN               NaN  
top             

In [None]:
# Explore unique values

unique_values(staff_data)


--- Valeurs uniques par colonne ---
     
     
staffId : 200 valeurs uniques
---------------------------------------
department : 7 valeurs uniques
---------------------------------------
role : 5 valeurs uniques
---------------------------------------
yearsOfService : 31 valeurs uniques
---------------------------------------
salary : 125 valeurs uniques
---------------------------------------
patientsHandled : 56 valeurs uniques
---------------------------------------
performanceScore : 31 valeurs uniques
---------------------------------------


#### **Departments**

In [None]:
# Informations

explore_info(department_data)


--- Infos sur le DataFrame ---
     
     
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   department         6 non-null      object 
 1   totalPatients      6 non-null      int64  
 2   avgStayDuration    6 non-null      float64
 3   totalRevenue       6 non-null      int64  
 4   totalSalaries      6 non-null      int64  
 5   operatingCost      6 non-null      int64  
 6   recoveryRate       6 non-null      float64
 7   bedsAvailable      6 non-null      int64  
 8   currentlyAdmitted  6 non-null      int64  
 9   bedUtilization     6 non-null      float64
 10  staffCount         6 non-null      int64  
 11  doctorCount        6 non-null      int64  
 12  nurseCount         6 non-null      int64  
dtypes: float64(3), int64(9), object(1)
memory usage: 756.0+ bytes


In [None]:
# Descriptive Statistics

describe_data(department_data)


--- Statistiques descriptives ---
     
     
        department  totalPatients  avgStayDuration  totalRevenue  \
count            6       6.000000         6.000000  6.000000e+00   
unique           6            NaN              NaN           NaN   
top     Cardiology            NaN              NaN           NaN   
freq             1            NaN              NaN           NaN   
mean           NaN     166.666667        13.671667  9.131530e+06   
std            NaN       7.312090         0.729285  1.153339e+06   
min            NaN     156.000000        13.010000  7.541597e+06   
25%            NaN     162.750000        13.162500  8.267326e+06   
50%            NaN     168.000000        13.345000  9.439341e+06   
75%            NaN     169.500000        14.270000  9.873790e+06   
max            NaN     177.000000        14.650000  1.047364e+07   

        totalSalaries  operatingCost  recoveryRate  bedsAvailable  \
count    6.000000e+00   6.000000e+00      6.000000       6.000000  

In [None]:
# Explore unique values

unique_values(department_data)


--- Valeurs uniques par colonne ---
     
     
department : 6 valeurs uniques
---------------------------------------
totalPatients : 5 valeurs uniques
---------------------------------------
avgStayDuration : 6 valeurs uniques
---------------------------------------
totalRevenue : 6 valeurs uniques
---------------------------------------
totalSalaries : 6 valeurs uniques
---------------------------------------
operatingCost : 6 valeurs uniques
---------------------------------------
recoveryRate : 5 valeurs uniques
---------------------------------------
bedsAvailable : 6 valeurs uniques
---------------------------------------
currentlyAdmitted : 6 valeurs uniques
---------------------------------------
bedUtilization : 6 valeurs uniques
---------------------------------------
staffCount : 5 valeurs uniques
---------------------------------------
doctorCount : 4 valeurs uniques
---------------------------------------
nurseCount : 6 valeurs uniques
-----------------------------------

#### **Daily Metrics**

In [None]:
# Informations

explore_info(metrics_data)


--- Infos sur le DataFrame ---
     
     
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   date             90 non-null     datetime64[ns]
 1   newAdmissions    90 non-null     int64         
 2   discharges       90 non-null     int64         
 3   emergencyVisits  90 non-null     int64         
 4   surgeries        90 non-null     int64         
 5   revenue          90 non-null     int64         
 6   expenses         90 non-null     int64         
dtypes: datetime64[ns](1), int64(6)
memory usage: 5.1 KB


In [None]:
# Descriptive Statistics

describe_data(metrics_data)


--- Statistiques descriptives ---
     
     
                      date  newAdmissions  discharges  emergencyVisits  \
count                   90      90.000000   90.000000        90.000000   
mean   2025-06-05 12:00:00      14.955556   12.277778        38.911111   
min    2025-04-22 00:00:00       5.000000    5.000000        21.000000   
25%    2025-05-14 06:00:00      10.000000    9.000000        29.250000   
50%    2025-06-05 12:00:00      15.000000   12.000000        39.000000   
75%    2025-06-27 18:00:00      20.000000   16.000000        46.000000   
max    2025-07-20 00:00:00      25.000000   20.000000        60.000000   
std                    NaN       5.992338    4.527141        10.812648   

       surgeries        revenue       expenses  
count  90.000000      90.000000      90.000000  
mean    8.955556  101363.833333   80222.311111  
min     3.000000   51839.000000   40430.000000  
25%     6.000000   83454.000000   58272.750000  
50%     9.000000   98291.000000   82073.5

In [None]:
# Explore unique values

unique_values(metrics_data)


--- Valeurs uniques par colonne ---
     
     
date : 90 valeurs uniques
---------------------------------------
newAdmissions : 21 valeurs uniques
---------------------------------------
discharges : 16 valeurs uniques
---------------------------------------
emergencyVisits : 39 valeurs uniques
---------------------------------------
surgeries : 13 valeurs uniques
---------------------------------------
revenue : 90 valeurs uniques
---------------------------------------
expenses : 90 valeurs uniques
---------------------------------------


### **Checking and Handling : Missing values**

In [None]:
def missing_values(data):
  print("\n--- Valeurs manquantes par colonne ---")
  print("====================================")
  print("     ")
  print("     ")
  print(data.isnull().sum())

In [None]:
# Patient Data

missing_values(patient_data)


--- Valeurs manquantes par colonne ---
     
     
patientId             0
age                   0
gender                0
department            0
admissionDate         0
dischargeDate       181
stayDuration          0
treatment             0
outcome               0
treatmentCost         0
insuranceCovered      0
isAdmitted            0
dtype: int64


**Reminder** :    

---



dischargeDate : nulle si le patient est toujours hospitalisé.

In [None]:
# Staff Data

missing_values(staff_data)


--- Valeurs manquantes par colonne ---
     
     
staffId             0
department          0
role                0
yearsOfService      0
salary              0
patientsHandled     0
performanceScore    0
dtype: int64


In [None]:
# Departement Data

missing_values(department_data)


--- Valeurs manquantes par colonne ---
     
     
department           0
totalPatients        0
avgStayDuration      0
totalRevenue         0
totalSalaries        0
operatingCost        0
recoveryRate         0
bedsAvailable        0
currentlyAdmitted    0
bedUtilization       0
staffCount           0
doctorCount          0
nurseCount           0
dtype: int64


In [None]:
# Daily Metrics

missing_values(metrics_data)


--- Valeurs manquantes par colonne ---
     
     
date               0
newAdmissions      0
discharges         0
emergencyVisits    0
surgeries          0
revenue            0
expenses           0
dtype: int64


### **Checking and Handling :Outliers Handling**

- Check outliers
- Check Distribution
- Handle Outliers

In [None]:
import plotly.express as px

In [None]:
for col in patient_data.columns:
  if patient_data[col].dtype != "object" :
    if patient_data[col].dtype != "bool":
      print(f"Column {patient_data[col].name} : {patient_data[col].dtype}")

Column age : int64
Column stayDuration : int64
Column treatmentCost : int64
Column insuranceCovered : int64


In [None]:
def check_outliers(data, columns):
  for col in columns:
    if data[col].dtype != "object" :
      if data[col].dtype != "bool":
        Q1 = data[col].quantile(0.25)
        Q3 = data[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        outliers = data[(data[col] < lower_bound) | (data[col] > upper_bound)]
        px.box(data, x=col).show()

In [None]:
# Patient data
print("     ")
print("     ")
print("====================================")
print("PATIENT DATA")
print("====================================")
print("     ")
print("     ")
check_outliers(patient_data, patient_data.columns)

     
     
PATIENT DATA
     
     


In [None]:
# Staff data
print("     ")
print("     ")
print("====================================")
print("STAFF DATA")
print("====================================")
print("     ")
print("     ")
check_outliers(staff_data, staff_data.columns)

     
     
STAFF DATA
     
     


In [None]:
# Departments data

print("     ")
print("     ")
print("====================================")
print("DEPARTMENTS DATA")
print("====================================")
print("     ")
print("     ")
check_outliers(department_data, department_data.columns)

     
     
DEPARTMENTS DATA
     
     


In [None]:
# Daily Metrics data

print("     ")
print("     ")
print("====================================")
print("DAILY METRICS DATA")
print("====================================")
print("     ")
print("     ")
check_outliers(metrics_data, metrics_data.columns)

     
     
DAILY METRICS DATA
     
     


##### **Check Distributions of Numeric values**

In [None]:
# Count element per value
def nunique_values(data):
  for col in data.columns:
    if data[col].nunique() < 10 :
      print(f"{col} : {data[col].value_counts()} valeurs uniques")
      print("---------------------------------------")


In [None]:
# Patient Data

nunique_values(patient_data)

gender : gender
Female    520
Male      480
Name: count, dtype: int64 valeurs uniques
---------------------------------------
department : department
Pediatrics    177
Emergency     170
Oncology      168
Neurology     168
Cardiology    161
Surgery       156
Name: count, dtype: int64 valeurs uniques
---------------------------------------
treatment : treatment
Medication        218
Intensive Care    215
Surgery           207
Observation       202
Therapy           158
Name: count, dtype: int64 valeurs uniques
---------------------------------------
outcome : outcome
Recovered       508
In Treatment    181
Improved        153
Stable           73
Deteriorated     59
Deceased         26
Name: count, dtype: int64 valeurs uniques
---------------------------------------
isAdmitted : isAdmitted
False    819
True     181
Name: count, dtype: int64 valeurs uniques
---------------------------------------


In [None]:
# Staff Data

nunique_values(staff_data)

department : department
Oncology          36
Pediatrics        32
Emergency         28
Surgery           27
Cardiology        27
Neurology         26
Administration    24
Name: count, dtype: int64 valeurs uniques
---------------------------------------
role : role
Nurse             45
Doctor            43
Technician        40
Support           38
Administrative    34
Name: count, dtype: int64 valeurs uniques
---------------------------------------


In [None]:
# Department Data

nunique_values(department_data)

department : department
Cardiology    1
Neurology     1
Oncology      1
Pediatrics    1
Emergency     1
Surgery       1
Name: count, dtype: int64 valeurs uniques
---------------------------------------
totalPatients : totalPatients
168    2
161    1
177    1
170    1
156    1
Name: count, dtype: int64 valeurs uniques
---------------------------------------
avgStayDuration : avgStayDuration
13.20    1
14.65    1
13.49    1
13.01    1
14.53    1
13.15    1
Name: count, dtype: int64 valeurs uniques
---------------------------------------
totalRevenue : totalRevenue
7985903     1
9111593     1
9909357     1
7541597     1
9767089     1
10473640    1
Name: count, dtype: int64 valeurs uniques
---------------------------------------
totalSalaries : totalSalaries
2611440    1
2556000    1
3603200    1
3002000    1
2680200    1
2516640    1
Name: count, dtype: int64 valeurs uniques
---------------------------------------
operatingCost : operatingCost
2798682    1
2741313    1
3789201    1
305911

In [None]:
# Daily Metrics Data

nunique_values(metrics_data)

**Note** :

- Le coût du traitement d’un cancer (oncologie) ne sera jamais comparable à celui d’une observation en pédiatrie.

- Un chirurgien senior en cardiologie ne peut pas être comparé à une secrétaire en pédiatrie.


In [None]:
# Treatment Cost by Department

fig = px.box(patient_data, x='department', y='treatmentCost', color='treatment', title= "Treatment Cost by Department")
fig.show()

In [None]:
# Insurance Ratio by Outcome

patient_data['insuranceRatio'] = patient_data['insuranceCovered'] / patient_data['treatmentCost']
fig = px.violin(patient_data, y='insuranceRatio', color='outcome', box=True, title= "Insurance Ratio by Outcome")
fig.show()


In [None]:
# Salary by Role and Department

fig = px.box(staff_data, x='role', y='salary', color='department', title= "Salary by Role and Department")
fig.show()


**Note** :

- Variability is controlled in each sub-group

- There are no local outliers

- The data generation model is consistent and realistic at subgroup level

**Conclusion** :

- There is no need to delete, winsorise or correct these values

- Keep the data as it is for descriptive analysis, visualisation or even certain models.

In [None]:
# Filter important roles

patienthandled_filtered = staff_data[staff_data['role'].isin(['Doctor', 'Nurse'])]

# Doctors and nurses are those who handle patients

patienthandled_filtered

Unnamed: 0,staffId,department,role,yearsOfService,salary,patientsHandled,performanceScore
0,S1000,Emergency,Nurse,17,93800,79,0.82
1,S1001,Surgery,Nurse,29,132720,46,0.81
4,S1004,Surgery,Nurse,30,134400,44,0.96
5,S1005,Oncology,Nurse,15,91000,99,0.91
6,S1006,Pediatrics,Doctor,27,184800,81,0.97
...,...,...,...,...,...,...,...
187,S1187,Emergency,Doctor,20,168000,60,0.98
189,S1189,Administration,Doctor,10,144000,63,0.91
190,S1190,Cardiology,Nurse,29,132720,54,0.73
193,S1193,Neurology,Nurse,11,85400,20,0.70


In [None]:
# metrics_data.merge(patienthandled_filtered, on='staffId', how='inner')

In [None]:
#patienthandled_filtered.groupby('role')['patientsPerYear'].describe()

In [None]:
import plotly.express as px

fig = px.box(patienthandled_filtered, x='role', y='patientsHandled', color='department',
             title="Handled Patients by Role and Service")
fig.show()

##### **Handle Outliers**

In [None]:
outlier = patienthandled_filtered[patienthandled_filtered['patientsHandled'] == 12]
outlier


Unnamed: 0,staffId,department,role,yearsOfService,salary,patientsHandled,performanceScore
27,S1027,Oncology,Doctor,3,127200,12,0.92


In [None]:
#Q1 = patienthandled_filtered['patientsHandled'].quantile(0.25)
#Q3 = patienthandled_filtered['patientsHandled'].quantile(0.75)
#IQR = Q3 - Q1
#upper_bound = Q3 + 1.5 * IQR
#ower_bound = Q1 - 1.5 * IQR

# Flatten the upper outliers

#patienthandled_filtered['patientsHandled'] = np.clip(patienthandled_filtered['patientsHandled'], None, upper_bound)
#patienthandled_filtered['patientsHandled'] = np.clip(patienthandled_filtered['patientsHandled'], None, lower_bound)



In [None]:
#import plotly.express as px

#fig = px.box(patienthandled_filtered, x='role', y='patientsHandled', color='department',
             #title="Handled Patients by Role and Service")
#fig.show()

In [None]:
# Delete the row

staff_data = staff_data[staff_data['staffId'] != 'S1116']

In [None]:
# Verification

staff_data[staff_data['staffId'] == 'S1116']

Unnamed: 0,staffId,department,role,yearsOfService,salary,patientsHandled,performanceScore


In [None]:
# Customised thresholds

patienthandled_filtered['charge'] = pd.cut(patienthandled_filtered['patientsHandled'],
                               bins=[0, 10, 25, 50, 100],
                               labels=['Low', 'Moderate', 'High', 'Intense'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
patienthandled_filtered

Unnamed: 0,staffId,department,role,yearsOfService,salary,patientsHandled,performanceScore,charge
0,S1000,Emergency,Nurse,17,93800,79,0.82,Intense
1,S1001,Surgery,Nurse,29,132720,46,0.81,High
4,S1004,Surgery,Nurse,30,134400,44,0.96,High
5,S1005,Oncology,Nurse,15,91000,99,0.91,Intense
6,S1006,Pediatrics,Doctor,27,184800,81,0.97,Intense
...,...,...,...,...,...,...,...,...
187,S1187,Emergency,Doctor,20,168000,60,0.98,Intense
189,S1189,Administration,Doctor,10,144000,63,0.91,Intense
190,S1190,Cardiology,Nurse,29,132720,54,0.73,Intense
193,S1193,Neurology,Nurse,11,85400,20,0.70,Moderate


In [None]:
# Save the final dataset

staff_data.to_json('./data/staff_cleaned.json', orient='records', indent=2)

## **Data Analysis**

### **Job Analysis/Statistics**

- Moyenne, médiane par groupe

- Comparaison de revenus entre services

- Ratio coût/assurance par traitement

In [None]:
# Average and median treatmentCost by service/

Cost_summary = patient_data.groupby('department')['treatmentCost'].agg(['mean', 'median', 'std']).reset_index().sort_values(by='mean', ascending=False)
#Cost_summary

In [None]:
Cost_summary_ = Cost_summary.style \
    .background_gradient(subset=['mean'], cmap='YlGnBu') \
    .background_gradient(subset=['median'], cmap='YlOrRd') \
    .background_gradient(subset=['std'], cmap='Purples') \
    .format({
        'mean': "{:,.0f} $",
        'median': "{:,.0f} $",
        'std': "{:,.0f} $"
    }) \
    .set_caption("Mean, Median, Std by Department")

Cost_summary_

Unnamed: 0,department,mean,median,std
5,Surgery,"67,139 $","55,644 $","53,378 $"
3,Oncology,"58,984 $","43,672 $","50,052 $"
1,Emergency,"57,453 $","45,996 $","48,987 $"
2,Neurology,"54,236 $","41,068 $","44,345 $"
0,Cardiology,"49,602 $","33,617 $","45,199 $"
4,Pediatrics,"42,608 $","23,133 $","43,271 $"


### **Salary Analysis**

In [None]:
department_colors = {
    'Surgery':       {'bg': '#E3F2FD', 'text': '#0D47A1'},  # Bleu clair + bleu foncé
    'Cardiology':    {'bg': '#FCE4EC', 'text': '#AD1457'},  # Rose pâle + rose foncé
    'Neurology':     {'bg': '#EDE7F6', 'text': '#4527A0'},  # Lavande + violet foncé
    'Pediatrics':    {'bg': '#E8F5E9', 'text': '#1B5E20'},  # Vert menthe + vert foncé
    'Emergency':     {'bg': '#FFF8E1', 'text': '#FF6F00'},  # Jaune doux + orange foncé
    'Oncology':      {'bg': '#FFEBEE', 'text': '#B71C1C'},  # Rouge clair + rouge foncé
    'Administration':{'bg': '#E3F2FD', 'text': '#1565C0'},  # Bleu clair + bleu classique
}

In [None]:
def style_department(row):
    dept = row['department']
    if dept in department_colors:
        bg = department_colors[dept]['bg']
        text = department_colors[dept]['text']
        return [f'background-color: {bg}; color: {text};'] * len(row)
    return [''] * len(row)

In [None]:
def highlight_salary(val):
    if val > 150000:
        return 'background-color: #D1C4E9; color: #311B92; font-weight: bold;'  # Violet doux
    elif val > 100000:
        return 'background-color: #FFF9C4; color: #F57F17;'  # Jaune pastel
    return 'background-color: #FFCDD2; color: #C62828;'      # Rouge clair

In [None]:
styled_df = staff_data[['staffId', 'department', 'role', 'salary']].sort_values('salary', ascending=False)

styled_df.style \
    .applymap(highlight_salary, subset=['salary']) \
    .apply(style_department, axis=1) \
    .bar(subset=['salary'], color='#A5D6A7') \
    .set_caption("Salaires du personnel hospitalier — codés par service & hiérarchie")


Styler.applymap has been deprecated. Use Styler.map instead.



Unnamed: 0,staffId,department,role,salary
106,S1106,Cardiology,Doctor,195840
117,S1117,Neurology,Doctor,192000
182,S1182,Pediatrics,Doctor,192000
23,S1023,Surgery,Doctor,190080
147,S1147,Pediatrics,Doctor,189600
174,S1174,Oncology,Doctor,187200
133,S1133,Cardiology,Doctor,187200
6,S1006,Pediatrics,Doctor,184800
142,S1142,Neurology,Doctor,184800
122,S1122,Oncology,Doctor,182400


In [None]:
# Distribution of costs by department

fig = px.histogram(
    patient_data,
    x='treatmentCost',
    color='department',
    facet_col='department',
    facet_col_wrap=3,
    nbins=30,
    title="Distribution of costs by department",
    color_discrete_sequence=px.colors.sequential.BuPu_r
)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Amount covered by insurance

import plotly.express as px

fig = px.histogram(
    patient_data,
    x='insuranceCovered',
    nbins=50,
    color='department',
    barmode='overlay',
    opacity=0.6,
    title="Amount Covered by Insurance (By service)",
    labels={'insuranceCovered': 'Covered amount'},
    color_discrete_sequence=px.colors.sequential.BuPu_r
)

fig.update_layout(
    bargap=0.2,
    xaxis_title='Covered Amount',
    yaxis_title='Number of patients',
    legend_title='Department'
)

fig.show()

In [None]:
# Add OutOfPocket column : Treatment Cost - Insurance Covered

patient_data['outOfPocket'] = patient_data['treatmentCost'] - patient_data['insuranceCovered']

In [None]:
fig = px.strip(
    patient_data,
    x='department',
    y='outOfPocket',
    color='department',
    title="Patients vs. Out-of-pocket expenses per service",
    stripmode='overlay',
    color_discrete_sequence=px.colors.sequential.BuPu_r
)
fig.update_layout(yaxis_title='uncovered amount')
fig.show()

In [None]:
# Average out-of-pocket expenses by department

agg_out_of_pocket = (
    patient_data.groupby('department')['outOfPocket']
    .mean()
    .reset_index()
    .sort_values(by='outOfPocket', ascending=False)
)

fig = px.bar(
    agg_out_of_pocket,
    x='department',
    y='outOfPocket',
    color='department',
    title='Average charges by department',
    labels={'outOfPocket': 'Average amount not covered'},
    color_discrete_sequence=px.colors.sequential.BuPu_r
)
fig.show()


In [None]:
# Distribution of work loads

fig = px.pie(patienthandled_filtered, names='charge', title="Distribution of work loads", color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()


### **Alerts Definition**

**Critiques** :

- Bed Utilization Rate (Elevé if > 90%)
- Taux de retablissement faible (if < 70%)
- Ratio personnel / patient


In [None]:
alertes = []

In [None]:
for it in department_data.iterrows():
  #print(it)
  pass

In [None]:
# Define Alerts

alertes = []

In [None]:
department_data['bedUtilization'].unique()

array([0.56, 1.  , 0.43, 0.82, 0.9 , 0.66])

In [None]:
# High Bed Utilization Rate

# Serious Alerts

for _, dept in department_data.iterrows():
  # Alerte de taux d'occupation élevé
  if dept['bedUtilization'] > 0.9:
    alertes.append({
    'titre': f"Taux d'occupation critique en {dept['department']}",
    'message': f"Le taux d'occupation des lits est de {dept['bedUtilization']*100:.1f}% ({dept['currentlyAdmitted']} patients pour {dept['bedsAvailable']} lits).",
    'departement': dept['department'],
    'type': 'critique'
    })

alertes

[{'titre': "Taux d'occupation critique en Neurology",
  'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).",
  'departement': 'Neurology',
  'type': 'critique'}]

In [None]:
# Low Recovery Rate Alerts

if dept['recoveryRate'] < 0.7:
  alertes.append({
      'titre': f"Taux de rétablissement faible en {dept['department']}",
      'message': f"Le taux de rétablissement est de seulement {dept['recoveryRate']*100:.1f}%, ce qui est inférieur à l'objectif de 70%.",
      'departement': dept['department'],
      'type': 'critique'
      })

alertes

[{'titre': "Taux d'occupation critique en Neurology",
  'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).",
  'departement': 'Neurology',
  'type': 'critique'}]

In [None]:
# Ratio Doctor/Patients Alerts

ratio_medecins = dept['doctorCount'] / max(1, dept['totalPatients'])
if ratio_medecins < 0.1:  # Moins de 1 médecin pour 10 patients
  alertes.append({
      'titre': f"Manque de médecins en {dept['department']}",
      'message': f"Ratio médecin/patients de 1:{int(1/ratio_medecins)}, ce qui est inférieur aux recommandations.",
      'departement': dept['department'],
      'type': 'critique'
      })

max(1, dept['totalPatients'])

156

In [None]:
print(ratio_medecins)

0.019230769230769232


In [None]:
print(alertes)

[{'titre': "Taux d'occupation critique en Neurology", 'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).", 'departement': 'Neurology', 'type': 'critique'}, {'titre': 'Manque de médecins en Surgery', 'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.', 'departement': 'Surgery', 'type': 'critique'}]


In [None]:
# Ratio Doctor/Patients Alerts

ratio_medecins = dept['doctorCount'] / dept['totalPatients']
if ratio_medecins < 0.1:  # Moins de 1 médecin pour 10 patients
  alertes.append({
      'titre': f"Manque de médecins en {dept['department']}",
      'message': f"Ratio médecin/patients de 1:{int(1/ratio_medecins)}, ce qui est inférieur aux recommandations.",
      'departement': dept['department'],
      'type': 'critique'
      })

alertes
ratio_medecins
# max(1, dept['totalPatients'])

0.019230769230769232

In [None]:
# Display Alertes Dataframe

pd.DataFrame(alertes)

Unnamed: 0,titre,message,departement,type
0,Taux d'occupation critique en Neurology,Le taux d'occupation des lits est de 100.0% (4...,Neurology,critique
1,Manque de médecins en Surgery,"Ratio médecin/patients de 1:52, ce qui est inf...",Surgery,critique
2,Manque de médecins en Surgery,"Ratio médecin/patients de 1:52, ce qui est inf...",Surgery,critique


**Générales**

- Tendance Admission / Sorties
- Alertes financières

In [None]:
# Admission vs. Discharges

metrics_data

Unnamed: 0,date,newAdmissions,discharges,emergencyVisits,surgeries,revenue,expenses
0,2025-04-22,22,8,60,13,120038,77681
1,2025-04-23,23,8,34,13,106975,65743
2,2025-04-24,6,9,58,9,134386,68412
3,2025-04-25,17,5,38,12,112336,54216
4,2025-04-26,15,17,29,15,70154,112666
...,...,...,...,...,...,...,...
85,2025-07-16,25,10,21,3,59076,65621
86,2025-07-17,22,6,36,6,107755,49625
87,2025-07-18,7,12,52,8,117072,51181
88,2025-07-19,11,12,26,7,92570,69067


In [None]:
len(metrics_data)

90

In [None]:
derniere_semaine = metrics_data.sort_values('date').tail(7)

In [None]:
admissions = derniere_semaine['newAdmissions'].sum()

admissions

np.int64(106)

In [None]:
sorties = derniere_semaine['discharges'].sum()

sorties

np.int64(92)

In [None]:
derniere_semaine

Unnamed: 0,date,newAdmissions,discharges,emergencyVisits,surgeries,revenue,expenses
83,2025-07-14,9,19,35,15,78430,118816
84,2025-07-15,20,15,41,13,88196,81826
85,2025-07-16,25,10,21,3,59076,65621
86,2025-07-17,22,6,36,6,107755,49625
87,2025-07-18,7,12,52,8,117072,51181
88,2025-07-19,11,12,26,7,92570,69067
89,2025-07-20,12,18,44,12,95661,82032


In [None]:
# Admissions vs Discharges Trend

if len(metrics_data) >= 7:  # Au moins une semaine de données
    derniere_semaine = metrics_data.sort_values('date').tail(7)
    admissions = derniere_semaine['newAdmissions'].sum()
    sorties = derniere_semaine['discharges'].sum()
    if admissions > sorties * 1.3:  # 30% plus d'admissions que de sorties
      alertes.append({
          'titre': "Déséquilibre admissions/sorties",
          'message': f"Sur les 7 derniers jours, il y a eu {admissions} admissions pour seulement {sorties} sorties, ce qui indique une pression croissante sur les capacités.",
          'departement': 'Tous',
          'type': 'avertissement'
          })

In [None]:
if len(metrics_data) >= 7:  # Au moins une semaine de données
    derniere_semaine = metrics_data.sort_values('date').tail(7)
    admissions = derniere_semaine['newAdmissions'].sum()
    sorties = derniere_semaine['discharges'].sum()
    if admissions > sorties * 1.3:  # 30% plus d'admissions que de sorties
      alertes.append({
          'titre': "Déséquilibre admissions/sorties",
          'message': f"Sur les 7 derniers jours, il y a eu {admissions} admissions pour seulement {sorties} sorties, ce qui indique une pression croissante sur les capacités.",
          'departement': 'Tous',
          'type': 'avertissement'
          })

alertes

[{'titre': "Taux d'occupation critique en Neurology",
  'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).",
  'departement': 'Neurology',
  'type': 'critique'},
 {'titre': 'Manque de médecins en Surgery',
  'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.',
  'departement': 'Surgery',
  'type': 'critique'},
 {'titre': 'Manque de médecins en Surgery',
  'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.',
  'departement': 'Surgery',
  'type': 'critique'}]

In [None]:
# Test

if admissions > sorties * 1.3:  # 30% plus d'admissions que de sorties
  alertes.append({
      'titre': "Déséquilibre admissions/sorties",
      'message': f"Sur les 7 derniers jours, il y a eu {admissions} admissions pour seulement {sorties} sorties, ce qui indique une pression croissante sur les capacités.",
      'departement': 'Tous',
      'type': 'avertissement'
      })
  print(alertes)
else:
  print(f"Capacité non pressante : Il y a eu {admissions} admissions pour {sorties} sorties.")


Capacité non pressante : Il y a eu 106 admissions pour 92 sorties.


In [None]:
metrics_data

Unnamed: 0,date,newAdmissions,discharges,emergencyVisits,surgeries,revenue,expenses
0,2025-04-22,22,8,60,13,120038,77681
1,2025-04-23,23,8,34,13,106975,65743
2,2025-04-24,6,9,58,9,134386,68412
3,2025-04-25,17,5,38,12,112336,54216
4,2025-04-26,15,17,29,15,70154,112666
...,...,...,...,...,...,...,...
85,2025-07-16,25,10,21,3,59076,65621
86,2025-07-17,22,6,36,6,107755,49625
87,2025-07-18,7,12,52,8,117072,51181
88,2025-07-19,11,12,26,7,92570,69067


In [None]:
# Financial Alerts
if len(metrics_data) >= 30:  # Au moins un mois de données
  dernier_mois = metrics_data.sort_values('date').tail(30)
  revenus = dernier_mois['revenue'].sum()
  depenses = dernier_mois['expenses'].sum()
  if depenses > revenus:
    alertes.append({
        'titre': "Déficit financier",
        'message': f"Sur les 30 derniers jours, les dépenses ({depenses}) ont dépassé les revenus ({revenus}), créant un déficit de {(depenses-revenus)}.",
        'departement': 'Tous',
        'type': 'avertissement'
            })

In [None]:
# Financial Alerts Test

if len(metrics_data) >= 30:  # Au moins un mois de données
  dernier_mois = metrics_data.sort_values('date').tail(30)
  revenus = dernier_mois['revenue'].sum()
  depenses = dernier_mois['expenses'].sum()
  if depenses > revenus:
    alertes.append({
        'titre': "Déficit financier",
        'message': f"Sur les 30 derniers jours, les dépenses ({depenses}) ont dépassé les revenus ({revenus}), créant un déficit de {(depenses-revenus)}.",
        'departement': 'Tous',
        'type': 'avertissement'
            })
  else :
    print(f"Pas de problème financier : Les revenus ({(revenus)}) sont supérieurs aux dépenses ({(depenses)}), créant un profit de {(revenus-depenses)}.")

Pas de problème financier : Les revenus (2892363) sont supérieurs aux dépenses (2408654), créant un profit de 483709.


In [None]:
# Display Alert

alertes

[{'titre': "Taux d'occupation critique en Neurology",
  'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).",
  'departement': 'Neurology',
  'type': 'critique'},
 {'titre': 'Manque de médecins en Surgery',
  'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.',
  'departement': 'Surgery',
  'type': 'critique'},
 {'titre': 'Manque de médecins en Surgery',
  'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.',
  'departement': 'Surgery',
  'type': 'critique'}]

In [None]:
# Display Alert Dataframe

pd.DataFrame(alertes)

Unnamed: 0,titre,message,departement,type
0,Taux d'occupation critique en Neurology,Le taux d'occupation des lits est de 100.0% (4...,Neurology,critique
1,Manque de médecins en Surgery,"Ratio médecin/patients de 1:52, ce qui est inf...",Surgery,critique
2,Manque de médecins en Surgery,"Ratio médecin/patients de 1:52, ce qui est inf...",Surgery,critique


### **Recommendations**

In [None]:
alertes

[{'titre': "Taux d'occupation critique en Neurology",
  'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).",
  'departement': 'Neurology',
  'type': 'critique'},
 {'titre': 'Manque de médecins en Surgery',
  'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.',
  'departement': 'Surgery',
  'type': 'critique'},
 {'titre': 'Manque de médecins en Surgery',
  'message': 'Ratio médecin/patients de 1:52, ce qui est inférieur aux recommandations.',
  'departement': 'Surgery',
  'type': 'critique'}]

In [None]:
# Recommendations

recommendations = []

In [None]:
for a in alertes :
  if "occupation" in a['titre'].lower():
    print(a)

{'titre': "Taux d'occupation critique en Neurology", 'message': "Le taux d'occupation des lits est de 100.0% (40 patients pour 40 lits).", 'departement': 'Neurology', 'type': 'critique'}


In [None]:
# Recommendations basées sur les alertes

alertes_occupation = [a for a in alertes if "occupation" in a['titre'].lower()]
alertes_occupation
if alertes_occupation:
  depts_concernes = [a['departement'] for a in alertes_occupation]
  recommendations.append({
    'titre': "Optimisation de la capacité d'accueil",
    'message': f"Envisager d'augmenter la capacité en lits dans les départements suivants : {', '.join(depts_concernes)}. Alternativement, mettre en place un système de transfert vers d'autres établissements pour les cas non urgents."
    })
  print(recommendations)

[{'titre': "Optimisation de la capacité d'accueil", 'message': "Envisager d'augmenter la capacité en lits dans les départements suivants : Neurology. Alternativement, mettre en place un système de transfert vers d'autres établissements pour les cas non urgents."}]


In [None]:
alertes_retablissement = [a for a in alertes if "rétablissement" in a['titre'].lower()]

alertes_retablissement

[]

In [None]:
# Recommandations pour l'amélioration des taux de rétablissement

alertes_retablissement = [a for a in alertes if "rétablissement" in a['titre'].lower()]
if alertes_retablissement:
  depts_concernes = [a['departement'] for a in alertes_retablissement]
  recommendations.append({
  'titre': "Amélioration des taux de rétablissement",
  'message': f"Analyser les protocoles de traitement dans les départements suivants : {', '.join(depts_concernes)}. Envisager une révision des protocoles de soins et une formation supplémentaire du personnel."
  })
  # print(recommendations)

In [None]:
# Recommandations pour l'optimisation du personnel

alertes_personnel = [a for a in alertes if "médecins" in a['titre'].lower() or "personnel" in a['titre'].lower()]
if alertes_personnel:
  depts_concernes = [a['departement'] for a in alertes_personnel]
  recommendations.append({
      'titre': "Optimisation des ressources humaines",
      'message': f"Recruter du personnel supplémentaire ou réaffecter temporairement du personnel vers les départements suivants : {', '.join(depts_concernes)}. Envisager également des heures supplémentaires pour le personnel existant."
      })
  # print(recommendations)

In [None]:
# Recommandations financières

alertes_financieres = [a for a in alertes if "financier" in a['titre'].lower() or "déficit" in a['titre'].lower()]
if alertes_financieres:
  recommendations.append({
      'titre': "Optimisation financière",
      'message': "Revoir la structure des coûts et identifier les postes de dépenses à optimiser. Envisager une révision des tarifs pour certains services spécialisés."
      })

In [None]:
# Recommandations générales basées sur les données

# Recommandation sur la durée d'hospitalisation
duree_moyenne = patient_data["stayDuration"].mean()
if duree_moyenne > 10:  # Si la durée moyenne est supérieure à 10 jours
  recommendations.append({
  'titre': "Réduction de la durée d'hospitalisation",
  'message': f"La durée moyenne d'hospitalisation est de {duree_moyenne:.1f} jours. Mettre en place des protocoles de sortie anticipée pour les patients stables et développer les soins ambulatoires."
        })

In [None]:
dept_counts = patient_data['department'].value_counts()


print(f"{dept_counts.idxmax()} : {dept_counts.max()}") # Le departement qui s'affiche le plus
print("-----------------")
print(f"{dept_counts.idxmin()} : {dept_counts.min()} ") # Le departement qui s'affiche le moins
print("-----------------")
print(f"Vérifier si {dept_counts.idxmax()} a 3 fois plus de patients que {dept_counts.idxmin()} : {dept_counts.max() > 3 * dept_counts.min()}")

Pediatrics : 177
-----------------
Surgery : 156 
-----------------
Vérifier si Pediatrics a 3 fois plus de patients que Surgery : False


In [None]:
# Recommandation sur l'équilibre des départements

dept_counts = patient_data['department'].value_counts()
if len(dept_counts) > 1:  # S'assurer qu'il y a au moins deux départements
  max_dept = dept_counts.idxmax()
  min_dept = dept_counts.idxmin()
  if dept_counts.max() > 3 * dept_counts.min():  # Si un département a 3 fois plus de patients qu'un autre
    recommendations.append({
    'titre': "Rééquilibrage des ressources entre départements",
    'message': f"Le département {max_dept} traite significativement plus de patients que le département {min_dept}. Envisager une redistribution des ressources et du personnel."
    })

In [None]:
recommendations

[{'titre': "Optimisation de la capacité d'accueil",
  'message': "Envisager d'augmenter la capacité en lits dans les départements suivants : Neurology. Alternativement, mettre en place un système de transfert vers d'autres établissements pour les cas non urgents."},
 {'titre': 'Optimisation des ressources humaines',
  'message': 'Recruter du personnel supplémentaire ou réaffecter temporairement du personnel vers les départements suivants : Surgery, Surgery. Envisager également des heures supplémentaires pour le personnel existant.'},
 {'titre': "Réduction de la durée d'hospitalisation",
  'message': "La durée moyenne d'hospitalisation est de 13.7 jours. Mettre en place des protocoles de sortie anticipée pour les patients stables et développer les soins ambulatoires."}]

In [None]:
reco = pd.DataFrame(recommendations)
reco

Unnamed: 0,titre,message
0,Optimisation de la capacité d'accueil,Envisager d'augmenter la capacité en lits dans...
1,Optimisation des ressources humaines,Recruter du personnel supplémentaire ou réaffe...
2,Réduction de la durée d'hospitalisation,La durée moyenne d'hospitalisation est de 13.7...


## **Résumé**

In [None]:
# Resume

resume = []

In [None]:
# Statistiques générales

total_patients = len(patient_data)
patients_hospitalises = len(patient_data[patient_data['isAdmitted']])
taux_occupation_moyen = department_data['bedUtilization'].mean()
taux_retablissement_moyen = department_data['recoveryRate'].mean()

resume.append({
    'titre': "Vue d'ensemble",
    'message': f"L'établissement a traité {total_patients} patients, dont {patients_hospitalises} sont actuellement hospitalisés. Le taux d'occupation moyen est de {taux_occupation_moyen*100:.1f}% et le taux de rétablissement moyen est de {taux_retablissement_moyen*100:.1f}%."
    })
# resume

In [None]:
# Département le plus performant

if not department_data.empty:
  dept_performance = department_data.copy()
  dept_performance['score'] = dept_performance['recoveryRate'] * 0.6 + (1 - dept_performance['bedUtilization']) * 0.4
  meilleur_dept = dept_performance.loc[dept_performance['score'].idxmax()]

  resume.append({
      'titre': "Département le plus performant",
      'message': f"Le département de {meilleur_dept['department']} présente les meilleurs indicateurs avec un taux de rétablissement de {meilleur_dept['recoveryRate']*100:.1f}% et un taux d'occupation équilibré de {meilleur_dept['bedUtilization']*100:.1f}%."
      })

- dept_performance['recoveryRate'] * 0.6

→ on donne un poids de 60% au taux de guérison (recoveryRate).

- (1 - dept_performance['bedUtilization']) * 0.4

→ on donne un poids de 40% à l'inverse du taux d’occupation des lits :

- Plus un lit est libre (i.e. faible taux d’occupation), plus le score augmente ici.

In [None]:
dept_performance['score']

Unnamed: 0,score
0,0.656
1,0.504
2,0.732
3,0.546
4,0.496
5,0.634


In [None]:
resume[1]

{'titre': 'Département le plus performant',
 'message': "Le département de Oncology présente les meilleurs indicateurs avec un taux de rétablissement de 84.0% et un taux d'occupation équilibré de 43.0%."}

In [None]:
# Tendances financières

if len(metrics_data) >= 30:
  dernier_mois = metrics_data.sort_values('date').tail(30)
  revenus_total = dernier_mois['revenue'].sum()
  depenses_total = dernier_mois['expenses'].sum()
  profit = revenus_total - depenses_total

  resume.append({
    'titre': "Situation financière",
    'message': f"Sur les 30 derniers jours, l'établissement a généré {revenus_total} de revenus pour {depenses_total} de dépenses, résultant en {'un profit' if profit >= 0 else 'une perte'} de {abs(profit)}."
    })

In [None]:
abs(profit)

np.int64(483709)

In [None]:
resume[2]

{'titre': 'Situation financière',
 'message': "Sur les 30 derniers jours, l'établissement a généré 2892363 de revenus pour 2408654 de dépenses, résultant en un profit de 483709."}

In [None]:
# Résumé des alertes

if alertes:
  alertes_critiques = len([a for a in alertes if a['type'] == 'critique'])
  alertes_avertissement = len([a for a in alertes if a['type'] == 'avertissement'])

  resume.append({
    'titre': "Points d'attention",
    'message': f"Le système a identifié {alertes_critiques} alertes critiques et {alertes_avertissement} avertissements qui nécessitent une attention particulière."
    })

In [None]:
resume[3]

{'titre': "Points d'attention",
 'message': 'Le système a identifié 3 alertes critiques et 0 avertissements qui nécessitent une attention particulière.'}

# **Tableau Compact : Analysis**

In [None]:
if len(metrics_data) > 0:
        fig = px.line(metrics_data, x='date', y=['newAdmissions', 'discharges'],
                     title="Admissions_discharges",
                     labels={'value': "count", 'date': "date", 'variable': "metric"},
                     color_discrete_map={'nouvellesAdmissions': "#3498DB", 'sorties': "#1F618D"})
        fig.update_layout(
            height=250,
            legend_title_text='',
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
            plot_bgcolor= "#141E2E",
            paper_bgcolor= "#141E2E",
            font=dict(color="#E0E0E0"),
            #xaxis=dict(gridcolor= "#E0E0E0"),
            #yaxis=dict(gridcolor=current_theme["border_color"]),
            margin=dict(l=10, r=10, t=40, b=10)
        )
        fig.show()

In [None]:
if not department_data.empty:
        fig = px.bar(department_data, x='department', y='bedUtilization',
                    title="Bed_utilization",
                    labels={'bedUtilization': "Bed_utilization_label", 'department': "department_label"},
                    #color='departement  'departement': t("department_label")},
                    color='department',
                    color_discrete_map={dept: color for dept, color in zip(department_data['department'], ["#3498DB", "#2980B9", "#1F618D", "#2C3E50", "#5DADE2", "#85C1E9", "#7F8C8D"])})
        fig.update_layout(
            height=250,
            coloraxis_showscale=False,
            plot_bgcolor="#141E2E",
            paper_bgcolor="#141E2E",
            font=dict(color="#E0E0E0"),
            #xaxis=dict(gridcolor=current_theme["border_color"]),
            #yaxis=dict(gridcolor=current_theme["border_color"]),
            showlegend=False,
            margin=dict(l=10, r=10, t=40, b=10)
        )
        fig.update_yaxes(range=[0, 1], tickformat='.0%')
        fig.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Two subplots to compare Recovery Rate and BedUtilization

if not department_data.empty:
  fig = make_subplots(rows=1, cols=2,
                      subplot_titles=("Recovery Rate", "Bed Utilization"))

  # Define the grayscale color sequence
  grayscale_colors = ["#E74C3C", "#C0392B", "#D35400", "#E67E22", "#F39C12", "#922B21", "#7F8C8D"]

  # Add Recovery Rate bar chart
  fig.add_trace(
      go.Bar(x=department_data['department'], y=department_data['recoveryRate'], name='Recovery Rate',
             marker_color=grayscale_colors[:len(department_data['department'])]), # Assign colors
      row=1, col=1
  )

  # Add Bed Utilization bar chart
  fig.add_trace(
      go.Bar(x=department_data['department'], y=department_data['bedUtilization'], name='Bed Utilization',
             marker_color=grayscale_colors[:len(department_data['department'])]), # Assign colors
      row=1, col=2
  )

  # Update layout
  fig.update_layout(
      title_text="Department Performance: Recovery Rate vs. Bed Utilization",
      showlegend=True,
      paper_bgcolor='black',
      plot_bgcolor='black',
      font=dict(color='white')
  )

  fig.show()

In [None]:
if not patient_data.empty:
  dept_counts = patient_data['department'].value_counts().reset_index()
  dept_counts.columns = [("department_label"), "count"]

  # Créer une carte de couleurs pour les départements
  dept_color_map = {dept: color for dept, color in zip(dept_counts["department_label"], ["#E74C3C", "#C0392B", "#D35400", "#E67E22", "#F39C12", "#922B21", "#7F8C8D"])}

  fig = px.pie(dept_counts, values="count", names="department_label",
    title="patient_distribution",
    color="department_label",
    color_discrete_map=dept_color_map)
  fig.update_traces(textposition='inside', textinfo='percent+label', textfont=dict(color='#000000'))
  fig.update_layout(
    legend=dict(orientation="h", yanchor="bottom", y=-0.1, xanchor="center", x=0.5),
    font=dict(color="#F0F0F0"),
    paper_bgcolor="#0E0808",
    plot_bgcolor="#0E0808"
    )

  fig.show()

##### **Date Format**

In [None]:
min_date = metrics_data['date'].min().date()
max_date = metrics_data['date'].max().date()
plage_date = (min_date, max_date)

In [None]:
if len(plage_date) == 2:
    date_debut, date_fin = plage_date
    # Convertir en datetime pour une comparaison correcte
    date_debut_dt = pd.Timestamp(date_debut)
    date_fin_dt = pd.Timestamp(date_fin)
    quotidien_filtre = metrics_data[(metrics_data['date'] >= date_debut_dt) & (metrics_data['date'] <= date_fin_dt)]
else:
    quotidien_filtre = metrics_data
    # Par défaut à la plage complète si plage_date n'est pas correctement définie
    date_debut = min_date
    date_fin = max_date


In [None]:
# Display informations

print(f"Date de début : {date_debut}")
print(f"Date de fin : {date_fin}")
print(f"Quantité de données : {len(quotidien_filtre)}")

Date de début : 2025-04-22
Date de fin : 2025-07-20
Quantité de données : 90


In [None]:
# Test Plage date using timedelta between the choosen date and the moment the program is executed

choosen_date = input("Enter the date - yyyy/mm/dd : ")

# Convert choosen date
choosen_date = pd.to_datetime(choosen_date)
print(f"Choosen date : {choosen_date}")
print(f"Moment de l'exécution du programme : {pd.Timestamp.now()}")
print(choosen_date in plage_date)
if choosen_date in plage_date :
  if len(plage_date) == 2:
      date_debut, date_fin = plage_date

  print(plage_date)
  print(f"La différence entre la date de debut et de fin {date_fin - date_debut}")

Enter the date - yyyy/mm/dd : 2025-02-01
Choosen date : 2025-02-01 00:00:00
Moment de l'exécution du programme : 2025-07-21 14:46:03.441006
False


In [None]:
# Test 2025-04-22

# Test Plage date using timedelta between the choosen date and the moment the program is executed

choosen_date = input("Enter the date - yyyy/mm/dd : ")
print("============================")

# Convert choosen date
choosen_date = pd.to_datetime(choosen_date)
current_date = pd.Timestamp.now()
choosen_date, current_date = plage_date

print(f"Choosen date : {choosen_date}")
print(f"Current date : {pd.Timestamp.now()}")

print(f"Verify if choosen_date in Date column : {choosen_date in quotidien_filtre['date']}")
print(current_date - choosen_date)


Enter the date - yyyy/mm/dd : 2025-04-22
Choosen date : 2025-04-22
Current date : 2025-07-21 14:46:13.980661
Verify if choosen_date in Date column : False
89 days, 0:00:00


In [None]:
# Iterate through the date column and calculate the difference between the previous date and the following one in a new column
for index, row in quotidien_filtre.iterrows():
    if index > 0:
        previous_date = quotidien_filtre.loc[index - 1, 'date']
        quotidien_filtre.loc[index, 'date_diff'] = row['date'] - previous_date

quotidien_filtre["date_diff"] = pd.to_timedelta(quotidien_filtre["date_diff"])

In [None]:
# quotidien_filtre

Unnamed: 0,date,newAdmissions,discharges,emergencyVisits,surgeries,revenue,expenses,date_diff
0,2025-04-22,22,8,60,13,120038,77681,NaT
1,2025-04-23,23,8,34,13,106975,65743,1 days
2,2025-04-24,6,9,58,9,134386,68412,1 days
3,2025-04-25,17,5,38,12,112336,54216,1 days
4,2025-04-26,15,17,29,15,70154,112666,1 days
...,...,...,...,...,...,...,...,...
85,2025-07-16,25,10,21,3,59076,65621,1 days
86,2025-07-17,22,6,36,6,107755,49625,1 days
87,2025-07-18,7,12,52,8,117072,51181,1 days
88,2025-07-19,11,12,26,7,92570,69067,1 days


In [None]:
# Revenue vs Charges

fig = px.line(quotidien_filtre, x='date', y=['revenue', 'expenses'],
  title="Revenue & Expenses",
  labels={'value': "amount", 'date': "date", 'variable': "metric"},
  color_discrete_map={'revenus': "#27AE60", 'depenses': "#C0392B"})

# Ajouter la zone de profit

quotidien_filtre['profit'] = quotidien_filtre['revenue'] - quotidien_filtre['expenses']
fig.add_trace(go.Scatter(
x=quotidien_filtre['date'],
y=quotidien_filtre['profit'],
fill='tozeroy',
mode='none',
name="profit",
fillcolor=f'rgba({int("#27AE60"[1:3], 16)}, {int("#C0392B"[3:5], 16)}, {int("#27AE60"[5:7], 16)}, 0.2)'
        ))

fig.update_layout(
            legend_title_text='',
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
            plot_bgcolor="#162727",
            paper_bgcolor="#162727",
            font=dict(color="#E0E0E0"),
            xaxis=dict(gridcolor="#1E3535"),
            yaxis=dict(gridcolor="#1E3535")
        )

In [None]:
# Revenus et coûts par département

if not department_data.empty:
  fig = go.Figure()

fig.add_trace(go.Bar(
  x=department_data['department'],
  y=department_data['totalRevenue'],
  name="revenue",
  marker_color="#4CAF50"
  ))

fig.add_trace(go.Bar(
  x=department_data['department'],
  y=department_data['operatingCost'],
  name="operating_cost",
  marker_color="#B71C1C"
  ))

fig.update_layout(
  title="department_revenue_cost",
  barmode='group',
  xaxis_title="department_label",
  yaxis_title="amount",
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
  plot_bgcolor="#1E1E1E",
  paper_bgcolor="#1E1E1E",
  font=dict(color="#E0E0E0"),
  xaxis=dict(gridcolor="#2A2A2A"),
  yaxis=dict(gridcolor="#2A2A2A")
            )

In [None]:
chart_colors =  ["#D4AF37", "#C0C0C0", "#9D8221", "#B8860B", "#E6BE8A", "#DAA520", "#A8A8A8"]

# Section du personnel

# Distribution du personnel par rôle
if not staff_data.empty:
  role_counts = staff_data['role'].value_counts().reset_index()
  role_counts.columns = ["role", "count"]

# Créer une carte de couleurs pour les rôles
  role_colors = {
  "doctor": chart_colors[0],
  "nurse": chart_colors[1],
  "technician": chart_colors[2],
  "administrative": chart_colors[3],
  "support": chart_colors[4]
            }

fig = px.bar(role_counts, x="role", y="count",
  title="staff_distribution",
    color="role",
    color_discrete_map=role_colors)

fig.update_layout(
  showlegend=False,
  plot_bgcolor="#1A1010",
  paper_bgcolor="#1A1010",
  font=dict(color="#F0F0F0"),
  xaxis=dict(gridcolor="#2A1515"),
  yaxis=dict(gridcolor="#2A1515")
  )

In [None]:
# Performance du personnel

if not staff_data.empty:
  performance_data = staff_data.groupby('role')['performanceScore'].mean().reset_index()

fig = px.bar(performance_data, x='role', y='performanceScore',
  title="performance_score",
  labels={'scorePerformance': "performance_score", 'role': "role"},
  color='role',
  color_discrete_map=role_colors)

fig.update_layout(
  showlegend=False,
  plot_bgcolor="#1A1010",
  paper_bgcolor="#1A1010",
  font=dict(color="#F0F0F0"),
  xaxis=dict(gridcolor="#2A1515"),
  yaxis=dict(gridcolor="#2A1515")
  )

fig.update_yaxes(range=[0, 1])
fig.show()