In [1]:
import pandas as pd
import numpy as np


class ModelRecommendation:
    def __init__(self, transaction_data, item_code_details, recommendation_pairs, OHT_salary = 35, specialist_salary=125):
        self.transaction_data = transaction_data
        self.converted_transaction_data = pd.DataFrame()
        self.item_code_details = item_code_details
        self.recommendation_pairs = recommendation_pairs
        self.OHT_salary = OHT_salary
        self.specialist_salary = specialist_salary
        
        
        
        
    def generate_converted_transaction_data(self, conversion_rate):
        
        
        # Duplicate each row and append to df2
        for _, row in self.transaction_data.iterrows():
            
            if np.random.rand() > conversion_rate:
                continue
            
            converted_row = row.copy()
            converted_row['Treatment'] = self.recommendation_pairs.loc[self.recommendation_pairs['item_number'] == row['Treatment'], 'recommended_item'].values[0]

            converted_row['Revenue'] = self.item_code_details.loc[self.item_code_details['item_number'] == converted_row['Treatment'], 'price AUD'].values[0] 
            
            COGS_material = self.item_code_details.loc[self.item_code_details['item_number'] == converted_row['Treatment'], 'cost_material AUD'].values[0]
            
            duration = self.item_code_details.loc[self.item_code_details['item_number'] == converted_row['Treatment'], 'duration'].values[0]
            
            if self.item_code_details.loc[self.item_code_details['item_number'] == converted_row['Treatment'], 'medical_officer_new'].values[0] == 'OHT':
                COGS_salary = duration * self.OHT_salary / 60
                
            else: 
                COGS_salary = duration * self.specialist_salary / 60
            
            converted_row['Expense'] = COGS_material + COGS_salary
            
            converted_row['Period'] = pd.to_datetime(converted_row['Period'], errors='coerce') 
            converted_row['Period'] += pd.Timedelta(days=np.random.randint(7, 14)) 
            
            self.converted_transaction_data = pd.concat([self.converted_transaction_data, pd.DataFrame([converted_row])], ignore_index=True)
            
            
            
        
        return self.converted_transaction_data
    
    def group_by_period(self):
        
        # return after grouping by month-year
        return self.converted_transaction_data.groupby(self.converted_transaction_data['Period'].dt.to_period("M")).agg({'Revenue':'sum', 'Expense':'sum'}).reset_index()


In [6]:

transaction_data = pd.read_csv('forecast_df_treatment.csv')[:10]
item_code_details = pd.read_csv('cleaned_item_code.csv')
recommendation_pairs = pd.read_csv('recommendation_pair.csv')

transaction_data


Unnamed: 0,Period,Treatment,Revenue,Expense,Customer ID
0,2025-01-01,331,179.0,40.64,Patient 428
1,2025-01-01,531,87.0,23.59,Patient 526
2,2025-01-01,656,129.0,39.08,Patient 571
3,2025-01-01,437,93.0,41.75,Patient 259
4,2025-01-01,118,56.0,7.3,Patient 75
5,2025-01-02,438,71.0,26.98,Patient 524
6,2025-01-02,419,43.0,11.64,Patient 38
7,2025-01-02,945,26.0,4.956667,Patient 204
8,2025-01-03,851,261.0,154.0,Patient 491
9,2025-01-03,595,202.0,30.62,Patient 138


In [8]:
model_recommendation = ModelRecommendation(transaction_data, item_code_details, recommendation_pairs)

converted_transaction_data = model_recommendation.generate_converted_transaction_data(conversion_rate=0.4)
converted_transaction_data

Unnamed: 0,Period,Treatment,Revenue,Expense,Customer ID
0,2025-01-13,522,133.0,47.25,Patient 259
1,2025-01-14,394,361.0,103.01,Patient 75
2,2025-01-12,942,376.0,69.36,Patient 491


In [9]:
model_recommendation.group_by_period()

Unnamed: 0,Period,Revenue,Expense
0,2025-01,870.0,219.62
