In [3]:
import pickle

with open('clinic_metrics.pkl', 'rb') as f:
    clinic_metrics = pickle.load(f)

In [23]:
import pandas as pd
import numpy as np

start_date = pd.to_datetime('2022-01-01')
start_year = start_date.year
start_month = start_date.month


end_date = pd.to_datetime('2023-12-31')
end_year = end_date.year
end_month = end_date.month

num_months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) + 1

In [5]:
start_year

2022

In [6]:
all_clinic = list(clinic_metrics.keys())

In [7]:
all_clinic[0]

'N21701'

In [8]:
# !pip install numpy_financial

In [9]:
import sys
import os

# Manually define the path to the parent directory
project_root = 'C:/Users/Zak/OneDrive/Dokumen/GAIA Dental Studio/Model/Asset Value Calculator/Combined Model\Clinic Value Evaluation'
sys.path.append(project_root)


from model_forecasting import ModelForecastPerformance

In [10]:
def process_multiple_dfs(data_dict):
    # Initialize the result dictionary
    result = {}

    # Iterate over the input dictionary
    for idx, (initiative_name, df) in enumerate(data_dict.items()):
        # Group by Quarter and sum Profit
        profit_by_quarter = df.groupby('Quarter')['Profit'].sum().astype(int)

        # Create the output dictionary for this initiative
        result[idx] = {
            'initiatives': initiative_name,
            **profit_by_quarter.to_dict()
        }

    return result

In [44]:
cleaned_item_code = pd.read_csv('cleaned_item_code.csv')

pool_clinic = {}
pool_clinic_df = {}

for selected_clinic in all_clinic[:1]:

  clinic_dictionary = {}
  company_variables ={'General Expense':-clinic_metrics[selected_clinic]['total_expenses']}
  model = ModelForecastPerformance(company_variables)

  dataframe = model.generate_monthly_cashflow_given_item_code(clinic_metrics[selected_clinic]['updated_clinic_item_code'], cleaned_item_code, num_months, 0.15, start_year, start_month)

  dataframe2 = model.forecast_indirect_cost(num_months, start_year)

  model.add_hourly_period(dataframe)
  model.add_hourly_period(dataframe2)

  dataframe['Period'] = pd.to_datetime(dataframe['Period'])
  dataframe['Year'] = dataframe['Period'].dt.year
  dataframe['Quarter'] = dataframe['Period'].dt.quarter
  dataframe['Adjusted Quarter'] = (dataframe['Year'] - start_year) * 4 + dataframe['Quarter']
  dataframe['Quarter'] = 'Q' + dataframe['Adjusted Quarter'].astype(str)
  dataframe.drop(columns='Adjusted Quarter', inplace=True)

  dataframe2['Profit'] = dataframe2['Revenue'] - dataframe2['Expense']

  dataframe2['Period'] = pd.to_datetime(dataframe2['Period'])
  dataframe2['Year'] = dataframe2['Period'].dt.year
  dataframe2['Quarter'] = dataframe2['Period'].dt.quarter
  dataframe2['Adjusted Quarter'] = (dataframe2['Year'] - start_year) * 4 + dataframe2['Quarter']
  dataframe2['Quarter'] = 'Q' + dataframe2['Adjusted Quarter'].astype(str)
  dataframe2.drop(columns='Adjusted Quarter', inplace=True)

  clinic_dictionary['Gross Profit'] = dataframe
  clinic_dictionary['Indirect Cost'] = dataframe2

  pool_clinic_df[selected_clinic] = clinic_dictionary

  clinic_1 = process_multiple_dfs(clinic_dictionary)

  pool_clinic[selected_clinic] = clinic_1


In [46]:
dataframe_copy = dataframe.copy()

In [57]:
dataframe = dataframe_copy.copy()

In [54]:
import pandas as pd
import numpy as np
from scipy.stats import truncnorm

def assign_patient_ids(df, baseline_revenue, baseline_number_patients, relative_variation_spending):
    # Ensure Period column is datetime
    df['Period'] = pd.to_datetime(df['Period'])
    
    # Initialize an empty DataFrame to store results
    final_df = pd.DataFrame()
    
    # Loop through each year
    for year, year_df in df.groupby('Year'):
        # Compute actual revenue and patient-revenue ratio
        actual_revenue = year_df['Revenue'].sum()
        patient_ratio = baseline_number_patients / baseline_revenue
        
        # Calculate number of unique patients for the year
        number_patients = int(np.ceil(actual_revenue * patient_ratio))
        
        # Create patient pool
        patient_pool = [f'Patient {i + 1}' for i in range(number_patients)]
        
        # Shuffle the DataFrame for the year
        year_df = year_df.sample(frac=1, random_state=42).reset_index(drop=True)
        
        # Compute mean and standard deviation for spending
        mean_revenue = actual_revenue / number_patients
        std_revenue = mean_revenue * relative_variation_spending
        
        # Define bounds for truncation
        lower_bound = mean_revenue - std_revenue
        upper_bound = mean_revenue + std_revenue
        
        # Compute truncation parameters for scipy's truncnorm
        a, b = (lower_bound - mean_revenue) / std_revenue, (upper_bound - mean_revenue) / std_revenue
        
        # Assign patient IDs
        patient_idx = 0
        patient_totals = {patient_id: 0 for patient_id in patient_pool}
        
        for idx, row in year_df.iterrows():
            if patient_idx >= len(patient_pool):
                break
            
            # Current patient ID
            patient_id = patient_pool[patient_idx]
            
            # Generate spending limit if not already set
            if patient_totals[patient_id] == 0:
                patient_totals[patient_id] = truncnorm.rvs(a, b, loc=mean_revenue, scale=std_revenue, random_state=42)
            
            # Assign Patient ID to row
            year_df.at[idx, 'Patient ID'] = patient_id
            
            # Update cumulative spending for the patient
            patient_totals[patient_id] -= row['Revenue']
            
            # Move to next patient if spending limit is reached
            if patient_totals[patient_id] <= 0:
                patient_idx += 1
        
        # Append the processed DataFrame for the year
        final_df = pd.concat([final_df, year_df], ignore_index=True)
    
    # Re-sort by Period
    final_df = final_df.sort_values(by='Period').reset_index(drop=True)
    
    return final_df

In [49]:
def assign_patient_ids_with_cov_tolerance(df, baseline_revenue, baseline_number_patients, relative_variation_spending, cov_tolerance=0.3):
    # Ensure Period column is datetime
    df['Period'] = pd.to_datetime(df['Period'])
    
    # Initialize an empty DataFrame to store results
    final_df = pd.DataFrame()
    
    # Loop through each year
    for year, year_df in df.groupby('Year'):
        # Compute actual revenue and patient-revenue ratio
        actual_revenue = year_df['Revenue'].sum()
        patient_ratio = baseline_number_patients / baseline_revenue
        
        # Calculate number of unique patients for the year
        number_patients = int(np.ceil(actual_revenue * patient_ratio))
        
        # Create patient pool
        patient_pool = [f'Patient {i + 1}' for i in range(number_patients)]
        
        # Compute initial mean and standard deviation for spending
        mean_revenue = actual_revenue / number_patients
        
        # CoV calibration
        calibrated_std = mean_revenue * relative_variation_spending  # Initial std guess
        for _ in range(100):  # Maximum 100 iterations to prevent infinite loop
            simulated_spending = np.random.normal(mean_revenue, calibrated_std, size=1000)  # Simulate a distribution
            simulated_spending = simulated_spending[simulated_spending > 0]  # Ignore negative spending
            simulated_cov = simulated_spending.std() / simulated_spending.mean()  # Calculate CoV
            
            # Check if simulated CoV is within tolerance
            if abs(simulated_cov - relative_variation_spending) <= cov_tolerance * relative_variation_spending:
                break
            
            # Adjust std based on difference
            if simulated_cov > relative_variation_spending:
                calibrated_std *= 0.9  # Decrease std
            else:
                calibrated_std *= 1.1  # Increase std
        
        std_revenue = calibrated_std  # Final calibrated std
        
        # Shuffle the DataFrame for the year
        year_df = year_df.sample(frac=1, random_state=42).reset_index(drop=True)
        
        # Assign patient IDs
        patient_idx = 0
        assigned_patients = []
        patient_totals = {patient_id: 0 for patient_id in patient_pool}
        
        for idx, row in year_df.iterrows():
            if patient_idx >= len(patient_pool):
                break
            
            # Current patient ID
            patient_id = patient_pool[patient_idx]
            
            # Generate spending limit if not already set
            if patient_totals[patient_id] == 0:
                patient_totals[patient_id] = max(0, np.random.normal(mean_revenue, std_revenue))
            
            # Assign Patient ID to row
            year_df.at[idx, 'Patient ID'] = patient_id
            
            # Update cumulative spending for the patient
            patient_totals[patient_id] -= row['Revenue']
            
            # Move to next patient if spending limit is reached
            if patient_totals[patient_id] <= 0:
                patient_idx += 1
        
        # Append the processed DataFrame for the year
        final_df = pd.concat([final_df, year_df], ignore_index=True)
    
    # Re-sort by Period
    final_df = final_df.sort_values(by='Period').reset_index(drop=True)
    
    return final_df

In [58]:
# Parameters
baseline_revenue = 1800000
baseline_number_patients = 1500
relative_variation_spending = 0.10

result = assign_patient_ids(dataframe, baseline_revenue, baseline_number_patients, relative_variation_spending)
result

Unnamed: 0,Period,Code,Revenue,Expense,Profit,Hourly_Period,Year,Quarter,Patient ID
0,2022-01-01,072,29.0,17.744717,11.255283,2022-01-01 08:30:00,2022,Q1,Patient 3330
1,2022-01-01,927,40.0,32.059434,7.940566,2022-01-01 09:00:00,2022,Q1,Patient 3242
2,2022-01-01,811,151.0,25.514717,125.485283,2022-01-01 15:00:00,2022,Q1,Patient 2716
3,2022-01-01,416,95.0,26.284717,68.715283,2022-01-01 16:30:00,2022,Q1,Patient 3857
4,2022-01-01,322,199.0,47.779434,151.220566,2022-01-01 15:00:00,2022,Q1,Patient 532
...,...,...,...,...,...,...,...,...,...
158027,2023-12-31,022,20.0,15.814717,4.185283,2023-12-31 15:00:00,2023,Q8,Patient 3786
158028,2023-12-31,022,20.0,15.814717,4.185283,2023-12-31 13:30:00,2023,Q8,Patient 4850
158029,2023-12-31,022,20.0,15.814717,4.185283,2023-12-31 16:00:00,2023,Q8,Patient 4753
158030,2023-12-31,022,20.0,15.814717,4.185283,2023-12-31 16:00:00,2023,Q8,Patient 1736


In [60]:
# group by year and patient ID and show the sum revenue for each patient
group_year = result.groupby(['Year', 'Patient ID'])['Revenue'].sum()

# Convert the Series to a DataFrame for easier grouping
group_year_df = group_year.reset_index(name='Revenue')

# Group by Year and calculate the CoV
cov_by_year = (
    group_year_df
    .groupby('Year')['Revenue']
    .agg(['mean', 'std'])  # Calculate mean and standard deviation
    .assign(CoV=lambda x: x['std'] / x['mean'])  # Compute CoV
)

cov_by_year


Unnamed: 0_level_0,mean,std,CoV
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022,1430.091152,316.590304,0.221378
2023,1218.626482,57.481945,0.047169


In [15]:
dataframe

Unnamed: 0,Period,Code,Revenue,Expense,Profit,Hourly_Period,Year,Quarter
0,2022-01-01,114,83.0,36.359434,46.640566,2022-01-01 11:30:00,2022,Q1
1,2022-01-01,811,151.0,25.514717,125.485283,2022-01-01 11:00:00,2022,Q1
2,2022-01-01,419,43.0,18.034717,24.965283,2022-01-01 16:30:00,2022,Q1
3,2022-01-01,222,44.0,9.518239,34.481761,2022-01-01 15:30:00,2022,Q1
4,2022-01-01,314,143.0,64.254151,78.745849,2022-01-01 11:00:00,2022,Q1
...,...,...,...,...,...,...,...,...
9872,2022-12-31,022,20.0,15.814717,4.185283,2022-12-31 16:00:00,2022,Q4
9873,2022-12-31,022,20.0,15.814717,4.185283,2022-12-31 09:00:00,2022,Q4
9874,2022-12-31,114,83.0,36.359434,46.640566,2022-12-31 15:30:00,2022,Q4
9875,2022-12-31,121,29.0,8.008239,20.991761,2022-12-31 09:00:00,2022,Q4


In [None]:
dataframe2

Unnamed: 0,Period,Revenue,Expense,Hourly_Period,Profit,Year,Quarter
0,2022-01-31,0,23063.0,2022-01-31 14:30:00,-23063.0,2022,Q1
1,2022-02-28,0,23063.0,2022-02-28 09:00:00,-23063.0,2022,Q1
2,2022-03-31,0,23063.0,2022-03-31 16:00:00,-23063.0,2022,Q1
3,2022-04-30,0,23063.0,2022-04-30 15:00:00,-23063.0,2022,Q2
4,2022-05-31,0,23063.0,2022-05-31 09:30:00,-23063.0,2022,Q2
5,2022-06-30,0,23063.0,2022-06-30 11:00:00,-23063.0,2022,Q2
6,2022-07-31,0,23063.0,2022-07-31 15:00:00,-23063.0,2022,Q3
7,2022-08-31,0,23063.0,2022-08-31 08:00:00,-23063.0,2022,Q3
8,2022-09-30,0,23063.0,2022-09-30 10:30:00,-23063.0,2022,Q3
9,2022-10-31,0,23063.0,2022-10-31 10:00:00,-23063.0,2022,Q4


In [None]:
clinic_dictionary['Gross Profit'] = dataframe
clinic_dictionary['Indirect Cost'] = dataframe2

In [None]:
clinic_1 = process_multiple_dfs(clinic_dictionary)

In [None]:
pool_clinic = {}
all_clinic[0]

'N21701'

In [None]:
pool_clinic['N21701'] = clinic_1

In [None]:
pool_clinic

{'N21701': {0: {'initiatives': 'Gross Profit',
   'Q1': 546738,
   'Q10': 391524,
   'Q11': 323145,
   'Q12': -73155,
   'Q2': 498467,
   'Q3': 515743,
   'Q4': 518180,
   'Q5': 550732,
   'Q6': 532116,
   'Q7': 561977,
   'Q8': 487462,
   'Q9': 429474},
  1: {'initiatives': 'Indirect Cost',
   'Q1': -69189,
   'Q10': -69189,
   'Q11': -69189,
   'Q12': -69189,
   'Q2': -69189,
   'Q3': -69189,
   'Q4': -69189,
   'Q5': -69189,
   'Q6': -69189,
   'Q7': -69189,
   'Q8': -69189,
   'Q9': -69189}},
 'N21322': {0: {'initiatives': 'Gross Profit',
   'Q1': 134701,
   'Q10': 95800,
   'Q11': 95530,
   'Q12': 6409,
   'Q2': 112223,
   'Q3': 123431,
   'Q4': 149236,
   'Q5': 120802,
   'Q6': 123667,
   'Q7': 121241,
   'Q8': 124220,
   'Q9': 121639},
  1: {'initiatives': 'Indirect Cost',
   'Q1': -51786,
   'Q10': -51786,
   'Q11': -51786,
   'Q12': -51786,
   'Q2': -51786,
   'Q3': -51786,
   'Q4': -51786,
   'Q5': -51786,
   'Q6': -51786,
   'Q7': -51786,
   'Q8': -51786,
   'Q9': -51786}}}

In [None]:
# save pool clinic in json file
import json

with open('pool_clinic.json', 'w') as f:
    json.dump(pool_clinic, f)

In [None]:
len(pool_clinic_df)

50

In [None]:
# save pool_clinic_df to pickle
import pickle

with open('pool_clinic_df.pkl', 'wb') as f:
    pickle.dump(pool_clinic_df, f)