In [140]:
import pandas as pd
from typing import Tuple, Dict, List
from collections import namedtuple
import numpy as np

In [143]:
filename = "all.xlsx"
df = pd.read_excel(io=filename)

# Update header
df.columns = df.iloc[3]
holding_entity = Entity=df.iloc[0,0]

# Drop first 3 rows as it contains file meta
df = df.drop(index=[0,1,2,3])
df["Client name"] = ""

# Create entity column
# df = df.assign(Entity=holding_entity)


In [145]:
df.head(2)

3,Date,Source,Contact,Description,Reference,Currency,Debit (Source),Credit (Source),Debit (SGD),Credit (SGD),Account,Client name
4,2023-01-03 00:00:00,Receivable Invoice,STELLAR AI HOLDINGS PTE LTD,STELLAR AI HOLDINGS PTE LTD - Subscription fee...,083-15-R2,SGD,0,4400.0,0,4400.0,Sales - Subscription Fees,
5,2023-01-06 00:00:00,Receivable Invoice,Mitou Hong Kong Limited,Mitou Hong Kong Limited - Usage-based charges ...,173-1,USD,0,1432.73,0,1929.31,Sales - Usage Fees,


In [141]:
class profit_and_loss():
    def __init__(self) -> None:
        self.client_file: str = "client.xlsx"
        self.profit_loss_file: str = "all.xlsx"
        self.create_client_mapping()
        self.pl_df = self.get_profit_loss_df()
        
    def get_profit_loss_df(self) -> pd.DataFrame:
        df = pd.read_excel(io=filename)

        # Update header
        df.columns = df.iloc[3]
        holding_entity = Entity=df.iloc[0,0]

        # Drop first 3 rows as it contains file meta
        df = df.drop(index=[0,1,2,3])
        df["Client name"] = ""
        df = df.assign(Entity=holding_entity)
        return df
    
    def create_client_mapping(self) -> None:
        """Creates variables as helper for mapping
        """
        # Read excel file
        df = pd.read_excel(io=self.client_file,header=0)
        # Cleam file
        df["Client Code"] = pd.to_numeric(df["Client Code"], errors='coerce').fillna(0).astype(int)
        df["Xero Entity Name"] = df["Xero Entity Name"].fillna("TBC")

        # create auxilary lists
        churn_df = df.loc[(df["Xero Entity Name"] == "CHURNED" )].drop(columns=["Xero Entity Name"])
        tbc_df = df.loc[(df["Xero Entity Name"] == "TBC" )].drop(columns=["Xero Entity Name"])
        self.churn_dict: Dict[str,dict] = churn_df.set_index("Client Name").T.to_dict()
        self.tbc_dict: Dict[str,dict] = tbc_df.set_index("Client Name").T.to_dict()

        # create clean dict
        clean_df = df.loc[(df["Xero Entity Name"] != "TBC" ) & (df["Xero Entity Name"] != "CHURNED" )]
        self.entity_key_dict: Dict[str,List] = clean_df.set_index("Xero Entity Name").T.to_dict('list')
        self.code_key_dict: Dict[str,List]= clean_df.set_index("Client Code").T.to_dict('list')
    
    def find_client_name_by_entity_name(self, entity_name:str) -> str|None:
        """Returns client name by searching using entity name

        Args:
            entity_name (str): _description_

        Returns:
            str: _description_
        """
        record: str = self.entity_key_dict.get(entity_name,None)
        if record == None:
            return None
        return record[0]
            
    def find_client_name_by_code(self, customer_code:str) -> str|None:
        """Returns client name by searching using customer code

        Args:
            customer_code (str): _description_

        Returns:
            str: _description_
        """
        record: str = self.code_key_dict.get(customer_code,None)
        if record == None:
            return None
        return record[0]

    def force_find_client_name(self, description_field:str) -> str:
        """Brute force search client name in description field.
        Try each index in clean list, then CHURNED

        Returns:
            str: _description_
        """
        # Test each entity name in the dict
        for key in self.entity_key_dict:
            if key in description_field:
                # key subset of description
                # return client name from list
                return self.entity_key_dict[0]
        # Test all churned item
        for key in self.churn_dict:
            if key in description_field:
                # key subset of description
                return key
        return ""

    def populate_client_name(self) -> pd.DataFrame:
        # for each row in dataframe
        for index, row in self.pl_df.iterrows():
            found:bool = False
            # if there exist a contact
            if not pd.isnull(row["Contact"]):
                client_name = self.find_client_name_by_entity_name(row["Contact"])
                if client_name:
                    row["Client name"] = client_name
                    continue

            # if there exist a reference number
            if not pd.isnull(row["Reference"]):
                # get the first number
                customer_code:str = row["Reference"].split("-")[0]
                # check number against the client list
                client_name = self.find_client_name_by_code(customer_code)
                if client_name:
                    row["Client name"] = client_name
                    continue
                
            # if all fails
            # then for each valid xero entity name
            # check if it exists within the description
            # if true, then update the client field as such
            row["Client name"] = self.force_find_client_name(row["Description"])
        

    def save_to_csv(self, filename) -> None:
        np.savetxt("compiled.csv", 
           self.pl_df,
           delimiter =",", 
           fmt ='%s')
    
    def start(self) -> None:
        # update dataframe
        self.populate_client_name()
        self.save_to_csv("compiled.csv")
        

In [118]:
filename = "client.xlsx"

# Read excel file
df = pd.read_excel(io=filename,header=0)
# Cleam file
df["Client Code"] = pd.to_numeric(df["Client Code"], errors='coerce').fillna(0).astype(int)
df["Xero Entity Name"] = df["Xero Entity Name"].fillna("TBC")

# create auxilary lists
churn_df = df.loc[(df["Xero Entity Name"] == "CHURNED" )].drop(columns=["Xero Entity Name"])
tbc_df = df.loc[(df["Xero Entity Name"] == "TBC" )].drop(columns=["Xero Entity Name"])
churn_list = churn_df.values.tolist()
tbc_list = tbc_df.values.tolist()

# create clean dict
clean_df = df.loc[(df["Xero Entity Name"] != "TBC" ) & (df["Xero Entity Name"] != "CHURNED" )]
entity_key_dict = clean_df.set_index("Xero Entity Name").T.to_dict('list')
code_key_dict = clean_df.set_index("Client Code").T.to_dict('list')

  entity_key_dict = clean_df.set_index("Xero Entity Name").T.to_dict('list')
  code_key_dict = clean_df.set_index("Client Code").T.to_dict('list')


In [142]:
t = profit_and_loss()
t.start()

  self.entity_key_dict: Dict[str,List] = clean_df.set_index("Xero Entity Name").T.to_dict('list')
  self.code_key_dict: Dict[str,List]= clean_df.set_index("Client Code").T.to_dict('list')


KeyError: 'Contact'

In [129]:
d = churn_df.set_index("Client Name").T.to_dict('list')
print(d)

{'Carro SG': [7], 'Koinworks': [11], 'Grosiraja': [12], 'Speeddoc': [13], 'Medify Air': [14], 'Double Dragon': [16], 'Ms. Maricela Corpuz': [17], 'Manulife (Aspire Alliance)': [21], 'Wynnes Financial Advisors': [22], 'Redtone': [23], 'Mr Bayani Quilala V': [24], 'Grace Cornejo (Sunlife)': [27], 'Klickair': [28], 'Lifepal': [29], 'Mr. MARTI GIMENEZ (Prudential PH)': [31], 'Ms. Maria Racimo': [32], 'Ms. Hazren Vargas (AXA)': [33], 'Ms. Ginelle Sequitin (Sunlife)': [34], 'Mr. Collin Krishia Templo': [36], 'Mr. Ernest Villela': [37], 'Kudotek (Tekcash)': [39], 'Edelyn Yu (Sunlife)': [40], 'Akseleran': [41], 'Nelson Sequitin': [42], 'Rochelle Visda (Traiblazer)': [43], 'Leslie Abrenica (Maxicare)': [44], 'Jhoanna Marie E. Vorstenbosch (Sunlife)': [45], 'Nina Manipon (Manulife)': [46], 'Mr. Jeffrey Abergos (Manulife)': [47], 'Ms. Leony Escosia (Prudential)': [48], 'Pondo Peso': [49], 'Mr. Carlos Cervantes': [50], 'Timothy Francis Lim': [51], 'Rhea Adlay (Pacific Cross)': [52], 'Jenny Acob': 