In [97]:
import os
from dotenv import load_dotenv
from pydantic import BaseModel, Field
load_dotenv()


class Narration_Type(BaseModel):
    transaction_type: str = Field(description="The type of transaction from the list 'food', 'rent', 'family', 'shopping', 'self-care', 'transport', 'other', 'unknown'")
    mine: float = Field(description="The amount of money spent by the me")
    lent: float = Field(description="The amount of money lent by me. If no money lent, then 0")
    lent_by: str = Field(description="The person to whom the money is lent from the list 'pallavi', 'prateek', 'aws', 'arshad', 'none'")
    reasoning: str = Field(description="The reasoning behind the selecting transaction_type, the amount of money spent by me and lent")


def get_model(model: str = 'deepseek-r1:7b', provider: str = 'local'):
    if (provider == 'local'):
        from langchain_ollama import ChatOllama
        llm = ChatOllama(model=model, temperature=0)
        structured_llm = llm.with_structured_output(Narration_Type,
                                                    method="json_schema")
        return structured_llm
    elif (provider == 'aws'):
        from langchain_aws import ChatBedrockConverse
        import boto3
        access_key = os.getenv('ACCESS_KEY')
        secret_key = os.getenv('SECRET_KEY')
        bedrock_client = boto3.client('bedrock-runtime',
                                      region_name='us-east-1',
                                      aws_access_key_id=access_key,
                                      aws_secret_access_key=secret_key)
        llm = ChatBedrockConverse(client=bedrock_client,
                                  model=model,
                                  temperature=0)
        structured_llm = llm.with_structured_output(Narration_Type,
                                                    method="json_schema")
        return structured_llm

def get_embeddings(model:str='deepseek-r1:7b', provider:str='local'):
    if(provider == 'local'):
        from langchain_ollama import OllamaEmbeddings
        embeddings=OllamaEmbeddings(model=model)
        return embeddings
    elif(provider == 'aws'):
        from langchain_aws import BedrockEmbeddings
        import boto3
        access_key=os.getenv('ACCESS_KEY')
        secret_key=os.getenv('SECRET_KEY')
        bedrock_client=boto3.client('bedrock-runtime', region_name='us-east-1', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
        embeddings=BedrockEmbeddings(bedrock_client, model=model)
        return embeddings


In [None]:
from langchain_core.prompts import PromptTemplate
def classify_narration(narration:str, categories:list, users:list, amount:float):
    # llm=get_model()
    llm=get_model(model='anthropic.claude-3-sonnet-20240229-v1:0', provider='aws')
    category_list=', '.join(categories)
    users_list=', '.join(users)
    prompt=PromptTemplate(
        template=
        """
        'System: You will help me classify my bank transaction into one of the following categories: {categories}.
        Also determine if entire money is spent by me or split between me and someone else. 
        If split, it has to be between following users: {users}'
        Here are some details related to various categories:
        - rent: Includes payments made for rent, which includes my house rent, online subscriptions like netflix, Amazon Prime, jiohotstar. 
        Washing machine rent, Maid rent.
        - food: Includes payment related to food items like cake, pizza, burger. Also purchases from supermarket like Ratnadeep, Tata Star and DMart. Any amount less than 100 is safe to be considered as food if you
        are not able to determine the transaction category. This category will comprise the maximum number of transactions.
        - family: The user will specifically mention family in the transaction. It includes payments made to family members like my father, mother, brother, sister, wife, husband, son, daughter.
        - transport: It includes payments made for fuel, and online transport services like ola and uber.
        - self-care: It includes payments made for personal care like skin-care, hair-cut, gym. Also includes payments for fun activities like movies, comedy shows and concerts.
        - shopping: It includes payments made for shopping like clothes, shoes, bags, accessories, electronics, gadgets, furniture, home decor. The transactions include brands like addidas, nike, zara, h&m, apple, samsung, oneplus, mi, ikea, pepperfry.
        - unknown: If you are unsure of where to put the transaction. Select this category.

        Here are some details related to the users who split money with me:
        - Only if the user says 'by 2' then only split the amount otherwise entire amount is spent by me.
        - All the split transactions will have 'by x' or 'By x' in the narration. This means the money will be equally split by x people.
        - If you are certain that money is lent by me and I have not mentioned any name in the transaction then select 'pallavi'
        - pallavi : My flatmate, will be used in most of the transactions related to rent, food, transport, self-care, shopping.
        - prateek : My other flatmate, rarely used in transactions.
        - aws: My company, which involves transactions related to reimbursements.
        - arshad: My colleague, involes transaction related to food.
        'User': '{narration}. The total amount spent is {amount}.'
        """,
        input_variables=['narration', 'categories', 'users', 'amount']
    )
    chain=prompt|llm
    try:
        response=chain.invoke({'narration':narration, 'categories':category_list, 'users':users_list, 'amount':amount})
        return response
    except Exception as e:  
        print("Error:",e)
    
    

In [99]:
# categories=['food', 'rent', 'family', 'shopping', 'self-care', 'transport', 'other', 'unknown']
# users=['pallavi', 'prateek', 'aws', 'arshad']
# response=classify_narration('UPI-JAMALPUR  VIJAY-Q786072912@YBL-YESB0YBLUPI-100832556878-CHOCLATE ROOM by 2 ',categories, users, 1000.00)

In [100]:
# print(response.mine)

In [101]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
filename='march6'
filepath='./data/'+filename+'.DELIMITED'
columns=['Date','Narration','Value_Date', 'Debit_Amount', 'Credit_Amount','Chq_Ref_Number', 'Closing_Balance']
df=pd.read_csv(filepath, names=columns)
df = df.drop(index=0)
df = df.drop(columns=['Chq_Ref_Number', 'Value_Date'])

# Reset the index if needed
df = df.reset_index(drop=True)
df.head(2)

Unnamed: 0,Date,Narration,Debit_Amount,Credit_Amount,Closing_Balance
0,28/02/25,UPI-PRATEEK RANJAN DUTTA-DADA.PRATEEK.00358@OKHDFCBANK-HDFC0004809-100743026595-RENT MAID PLUS 15K,0.0,35000.0,1118856.67
1,01/03/25,UPI-WWW EXCITEL COM-EXCITEL.PAYU@HDFCBANK-HDFC0000499-100079521366-UPI TRANSACTION,824.82,0.0,1118031.85


In [102]:
import pandas as pd
import time
initial_balance=1083856.67
bank_balance=1083856.67
current_balance=1083856.67
input_amount=0
lent_amount=0

categories=['food', 'rent', 'family', 'shopping', 'self-care', 'transport', 'other', 'unknown']
users=['pallavi', 'prateek', 'aws', 'arshad']
target_df = pd.DataFrame(columns=['amount', 'mine', 'lent', 'balance'])


for index,row in df.iterrows():
    print(row['Narration'], row['Debit_Amount'], row['Credit_Amount'])
    print("-"*80)
    if float(row['Credit_Amount'].strip()) > 0.0:
        # Create a new row for the target DataFrame
        amount=float(row['Credit_Amount'].strip())
        current_balance=current_balance+amount
        bank_balance=bank_balance+amount
        new_row = {
            'narration':row['Narration'].lower(),
            'amount': row['Credit_Amount'].strip(),
            'type': 'input_amount',
            'mine': 0,  # Set appropriate value if needed
            'lent': 0,  # Set appropriate value if needed
            'lent_by':'None',
            'balance': current_balance, # Set appropriate value if needed
            'closing_balance': row['Closing_Balance'].strip(),
            'credit':'Y',
            'reasoning':'None'
            
        }
        # Append the new row to the target DataFrame
        target_df = pd.concat([target_df, pd.DataFrame([new_row])], ignore_index=True)
    else:
        amount=float(row['Debit_Amount'].strip())
        current_balance=current_balance-amount
        bank_balance=bank_balance-amount

        response=classify_narration(narration=row['Narration'].lower(),categories=categories, users=users, amount=row['Debit_Amount'])
        transaction_type=response.transaction_type.lower()
        mine=response.mine
        lent=response.lent
        lent_by=response.lent_by
        reasoning=response.reasoning



        new_row = {
            'narration':row['Narration'].lower(),
            'amount': row['Debit_Amount'].strip(),
            'type': transaction_type,
            'mine': mine,  # Set appropriate value if needed
            'lent': lent,  # Set appropriate value if needed
            'lent_by':lent_by,
            'balance': current_balance, # Set appropriate value if needed
            'closing_balance': row['Closing_Balance'].strip(),
            'credit':'N',
            'reasoning':reasoning
        }
        # Append the new row to the target DataFrame
        target_df = pd.concat([target_df, pd.DataFrame([new_row])], ignore_index=True)
    
    print("-"*80)

UPI-PRATEEK RANJAN DUTTA-DADA.PRATEEK.00358@OKHDFCBANK-HDFC0004809-100743026595-RENT MAID PLUS 15K                                  0.00            35000.00     
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
UPI-WWW EXCITEL COM-EXCITEL.PAYU@HDFCBANK-HDFC0000499-100079521366-UPI TRANSACTION                                                824.82                0.00     
--------------------------------------------------------------------------------


  target_df = pd.concat([target_df, pd.DataFrame([new_row])], ignore_index=True)


--------------------------------------------------------------------------------
UPI-JIOHOTSTAR-HOTSTARONLINE@YBL-YESB0YBLUPI-506055997561-COLLECT REQUEST FR                                                       79.00                0.00     
--------------------------------------------------------------------------------


KeyboardInterrupt: 

In [95]:
target_df.head(10)

Unnamed: 0,amount,mine,lent,balance,narration,type,lent_by,closing_balance,credit,reasoning
0,35000.0,0.0,0.0,1118856.67,upi-prateek ranjan dutta-dada.prateek.00358@okhdfcbank-hdfc0004809-100743026595-rent maid plus 15k,input_amount,,1118856.67,Y,
1,824.82,824.82,0.0,1118031.85,upi-www excitel com-excitel.payu@hdfcbank-hdfc0000499-100079521366-upi transaction,rent,,1118031.85,N,"The user's transaction details indicate that the amount spent is for a service provided by UPI, which falls under the category of 'rent' as it includes online subscriptions like Excitel and Payu."
2,79.0,79.0,0.0,1117952.85,upi-jiohotstar-hotstaronline@ybl-yesb0yblupi-506055997561-collect request fr,unknown,,1117952.85,N,"The transaction details do not specify any category such as food, rent, family, shopping, self-care, transport, or unknown. The amount is less than the threshold for certain categories like food (if it's a small purchase) but without more context, it's classified as 'unknown'. Additionally, there are no mentions of splitting between users, so the entire amount remains with the user."
3,500.0,500.0,0.0,1117452.85,upi-sandhya fuel station-paytm-65718951@ptys-yesb0ptmupi-100786433017-upi,transport,,1117452.85,N,"The transaction mentions 'fuel station', which falls under transport category as it includes payments for fuel and online transport services. The amount is entirely spent by the user."
4,10.0,10.0,0.0,1117442.85,upi-adigoppula shravan k-paytmqr5yzxcj@ptys-yesb0ptmupi-100787095809-upi,rent,,1117442.85,N,"The user mentioned 'upi-adigoppula shravan k-paytmqr5yzxcj@ptys-yesb0ptmupi-100787095809-upi' which is a transaction ID. The amount spent is 10.00, and the category is rent as it includes house rent or similar."
5,3000.0,3000.0,0.0,1114442.85,nwd-435584xxxxxx5455-id043212-hyderabad,rent,,1114442.85,N,"The transaction amount of 3000 falls under the category of rent as it includes payments for online subscriptions like Netflix, Amazon Prime, Jio-Hotstar. The entire amount is spent by me."
6,90454.0,90454.0,0.0,1023988.85,upi-md arshad ahmed-itzzarshad19@okaxis-utib0000376-100795816337-half amount,unknown,,1023988.85,N,"The transaction details do not specify any category such as food, rent, family, shopping, self-care, transport, or other. The user mentioned 'upi-md arshad ahmed-itzzarshad19@okaxis-utib0000376-100795816337-half amount' and the total spent is 90454.00. There's no indication that this amount was split between any of the specified users (pallavi, prateek, aws, arshad) or if it was entirely spent by the user."
7,1000.0,500.0,500.0,1022988.85,upi-jamalpur vijay-q786072912@ybl-yesb0yblupi-100832556878-choclate room by 2,food,2.0,1022988.85,N,"The transaction mentions 'vijay-q78607 and includes 'choclate room' which is a food-related place. The amount is split by 2, so each person contributes 500."
