In [1]:
# CHAT DEMO

In [2]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import pdfplumber
import pandas as pd

from google import genai
from google.genai import types
from google.genai.types import Tool

import json
from glom import glom, T
from collections import defaultdict


In [3]:
app = Flask(__name__)
CORS(app)

@app.route('/hello')
def hello():
    return jsonify({"message": "Success"})


In [4]:
# TOOLS

In [5]:
update_json_tool =  types.FunctionDeclaration(
    name="update_json",
    description="Before proceeding to the next question, once the required information is extracted from the user for the current question, this function will be called to update the json object with the new information. The updated json schema will be used to fill the ITR form.",
    parameters={ 
        "type": "object",
        "properties": {
            "data": {
                "type": "object",
                "description": "Extracted data from the user, in json format where key is the ITR section and value is the corresponding information to be filled in that section.",
            }
        },
        "required": ["data"],
    },
)

In [6]:
get_schema_tool =  types.FunctionDeclaration(
    name="get_schema_properties",
    description="Get the schema properties for a specific target key.",
    parameters={
        "type": "object",
        "properties": {
            "target_key": {
                "type": "string",
                "description": "The target key to retrieve schema properties for."
            }
        },
        "required": ["target_key"],
    },
)

In [7]:
get_ais_summary_tool =  types.FunctionDeclaration(
    name="get_ais_summary",
    description="This function returns the AIS summary of the user for the given financial year. Which can be used to answer the the user queries related to his AIS.",
    parameters={

    },
)

In [8]:
get_question_tool =  types.FunctionDeclaration(
    name="get_question",
    description="Get next question to ask the user to gather more information about income sources not captured in AIS.",
    parameters={

    },
)

In [9]:
grounding_tool = types.Tool(
    google_search=types.GoogleSearch()
)

In [10]:
prompt = f"""
        You are a highly experienced Financial Expert with specialized expertise in Indian Income Tax Return (ITR) filing and comprehensive knowledge of all possible income sources. Your role is to help taxpayers identify and gather information about income sources that may not be captured in their Annual Information Statement (AIS).

        Your primary task is to systematically identify additional income sources beyond what's mentioned in the AIS document, gather complete details for each confirmed income source, and ensure comprehensive tax filing.

        **Your Expertise Includes:**
        - Deep understanding of all income categories under Income Tax Act
        - Knowledge of income sources often missed in AIS statements
        - Experience in structured data collection for ITR filing
        - Understanding of ITR schema requirements and field dependencies
        - Ability to guide taxpayers through comprehensive income disclosure

        **Available Tools & Workflow:**

        1. **get_question()** - Use this tool to retrieve possible income sources
        - Returns a dictionary value with section as key and possible income sources as value
        - Continue using until tool returns "No questions left"
        - Use this to systematically cover all potential income categories

        2. **get_schema_properties(key)** - use when user confirms having income from a source
        - Input: The key returned from get_question for confirmed income source
        - Returns: Schema structure for that particular property in ITR document
        - Use this to understand what details need to be collected

        3. **update_json(details)** - use when you have gathered all required details
        - Input: Complete details formatted according to the schema requirements
        - Use this to save the collected information for ITR filling
        
        4. **get_ais_summary()** - use this tool to get the AIS summary of the user
        - Input: None
        - Returns: AIS summary of the user for the given financial year
        - Use this answer the user queries related to his AIS

        **Systematic Workflow:**
        1. **Initial Assessment**: Review the provided AIS statement to understand existing income sources
        2. **Question Generation**: Use get_question() to identify potential additional income sources
        3. **User Interaction**: Ask users about each potential income source in a conversational manner
        4. **Schema Retrieval**: For confirmed income sources, use get_schema_properties() to understand requirements
        5. **Detail Collection**: Gather all necessary information according to the schema
        6. **Data Update**: Use update_json() to save complete details for each income source


        **Conversation Guidelines:**
        - Be friendly and professional in your communication
        - Ask clear, specific questions about potential income sources
        - Explain why certain income sources might not appear in AIS
        - Help users understand the importance of complete disclosure
        - Break down complex financial terms into simple language
        - Be patient and thorough in collecting details

        **Instructions:**
        - Maintain a conversational and helpful tone throughout
        - While Gathering details make sure you only ask one or two details at a time and be friendly.
        - Ensure complete data collection before using update_json()
        - Continue until all potential income sources have been explored
        - Provide clear explanations for why additional information is needed

        **Critical Success Factors:**
        - Complete coverage of all potential income sources
        - Accurate schema-compliant data collection
        - User-friendly interaction and clear communication
        - Systematic workflow execution using provided tools
        - Comprehensive tax compliance assistance

        Use the External Tool calls to get the next question whenever you don't have any questions to ask the user.

        """


In [11]:


        # **Important Considerations:**
        # - Some income sources may not be reported by third parties in AIS
        # - Agricultural income, rental income, business income may need manual disclosure
        # - Foreign income and assets have specific reporting requirements
        # - Previous year adjustments and brought forward losses need attention
        # - Exempt income also needs to be disclosed in some cases


In [12]:
client = genai.Client(api_key="AIzaSyAZLY9vmRdQCpODSnpzF_BA3iXTmbJ2y6A")

config = types.GenerateContentConfig(
    system_instruction=prompt,
    tools=[
        types.Tool(
            function_declarations=[get_question_tool,get_schema_tool,update_json_tool,get_ais_summary_tool],
        ),
    ]
)


In [13]:
summary = {}
ais_data = {}
additional = {}
history = []

In [14]:
additional = {
  'ITR1_IncomeDeductions': 'Salary Income, Income from One House Property (if applicable), and other general deductions not in AIS',
  'Schedule80C': 'Deductions under Section 80C (e.g., PPF, LIC, ELSS)',
  'ScheduleUs24B': 'Deduction for Interest on Housing Loan (Section 24B)',
  'TaxPayments': 'Advance Tax and Self-Assessment Tax paid',
  'Schedule80D': 'Deductions under Section 80D (Medical Insurance Premium)',
}

In [15]:
def get_schema_properties(target_key):
    with open(r"D:\Akaike_Training\LegalLens.ai\Backend\resolved_schema_ITR_1.json") as f:
        data = json.load(f)

    values = glom(data, (f'**.{target_key}',))
    field_schema = {target_key: values[0]['properties']}
    print(field_schema)
    return field_schema

In [16]:
def update_json(data) :
    global summary , ais_data
    for key, value in data.items() :
        if key in summary.keys() :
            summary[key].append(value)
        else :
            summary[key] = [value]
        if key in ais_data.keys() :
            ais_data[key].append(value)
        else :
            ais_data[key] = [value]

In [17]:
def get_question():
    global additional
    if additional == {} :
        return "No more questions left" 
    return additional.popitem()

In [18]:
def get_summary():
    return summary

In [19]:
get_summary()

{}

In [20]:
def get_ais_summary(pdf_path, password=None):
    global summary, ais_data
    with pdfplumber.open(pdf_path, password=password) as pdf:
        second_rows = []
        temp_ais_data = defaultdict(list)
        
        columns = None

        for page in pdf.pages:
            for table in page.extract_tables():
                if len(table) >= 3 and len(table[1]) >= 2:
                    key = table[1][1]   
                    data_rows = table[2:]  
                    headers = table[2]     
 
                    df = pd.DataFrame(data_rows)

                    json_str = df.to_json(orient="records")
                    temp_ais_data[key].append(json.loads(json_str))
                if table and len(table) > 2:
                    if columns is None:
                        columns = table[0]
                    second_rows.append(table[1]) 

        df_second_rows = pd.DataFrame(second_rows)
        summary = df_second_rows.to_json(orient="records")

        ais_data = dict(temp_ais_data)

        return [summary, ais_data]


In [21]:
history = list([
        # types.Content(
        #     role="model",
        #     parts=[
        #         types.Part.from_text(text=prompt),
        #     ],
        # ),
        types.Content(
            role="model",
            parts=[
                types.Part.from_text(text="Hello! I'm here to help you identify and gather information about all your income sources for accurate tax filing. I'll ask you some questions to ensure we cover everything comprehensively."),
            ],
        ),
    ])

In [22]:
@app.route("/upload", methods=["POST"])
def upload_pdf():
    global summary, ais_data
    if "pdf" not in request.files:
        return jsonify({"message": "No file uploaded"}), 400

    pdf_file = request.files["pdf"]
    password = request.form.get("password")
    print(f"Received password: {password}")
    # Save the file (optional)
    # save_path = os.path.join("uploads", pdf_file.filename)
    # os.makedirs("uploads", exist_ok=True)
    # pdf_file.save(save_path)

    with pdfplumber.open(pdf_file, password=password) as pdf:
        second_rows = []
        temp_ais_data = defaultdict(list)
        
        columns = None

        for page in pdf.pages:
            for table in page.extract_tables():
                if len(table) >= 3 and len(table[1]) >= 2:
                    key = table[1][1]   
                    data_rows = table[2:]  
                    headers = table[2]     
 
                    df = pd.DataFrame(data_rows)

                    json_str = df.to_json(orient="records")
                    temp_ais_data[key].append(json.loads(json_str))
                if table and len(table) > 2:
                    if columns is None:
                        columns = table[0]
                    second_rows.append(table[1]) 

        df_second_rows = pd.DataFrame(second_rows)
        summary = df_second_rows.to_json(orient="records")

        ais_data = dict(temp_ais_data)

        # return [summary, ais_data]

    return jsonify({"message": "File uploaded successfully", "file_path": "save_path","summary": summary,"ais_data": ais_data}), 200


In [23]:
@app.route('/get_details', methods=['POST'])
def get_details_chat() :
    global history
    data = request.get_json()
    user_message = data.get("user_query")
    is_new_chat = data.get("is_new_chat")
    if is_new_chat :
        history = list([
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text="Hello! I'm here to help you identify and gather information about all your income sources for accurate tax filing. I'll ask you some questions to ensure we cover everything comprehensively."),
                ],
            ),
        ])
    history.append(
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=user_message),
            ],
        )
    )

    response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents = history ,
    config=config
    )
    if response.function_calls :
        for call in response.function_calls :
            print(call.name)
            print(call.args)
            if call.name == "get_question" :
                question = get_question()
                print(question)
                history.append(response.candidates[0].content)
                function_response_contents  = types.Part.from_function_response(
                    name=call.name,
                    response={"contents": question}
                )
                history.append(
                    function_response_contents
                )
            
            if call.name == "get_schema_properties" :
                schema = get_schema_properties(call.args["target_key"])
                print(schema)
                history.append(response.candidates[0].content)
                function_response_contents  = types.Part.from_function_response(
                    name=call.name,
                    response={"contents": schema}
                )
                history.append(
                    function_response_contents
                )
            
            
            if call.name == "get_ais_summary" :
                schema = get_summary()
                print(schema)
                history.append(response.candidates[0].content)
                function_response_contents  = types.Part.from_function_response(
                    name=call.name,
                    response={"contents": schema}
                )
                history.append(
                    function_response_contents
                )
            
            if call.name == "update_json" :
                schema = update_json(call.args["data"])
                print(schema)
                history.append(response.candidates[0].content)
                function_response_contents  = types.Part.from_function_response(
                    name=call.name,
                    response={"contents": schema}
                )
                history.append(
                    function_response_contents
                )
        response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents = history,
        config=config ,
        )

    history.append(
        types.Content(
            role="model",
            parts=[
                types.Part.from_text(text=response.text),
            ],
        )
    )
    
    return jsonify({"response": response.text})
    



In [24]:
if __name__ == '__main__':
    app.run(debug=False,port=3500)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:3500
Press CTRL+C to quit


In [None]:
history

[Content(
   parts=[
     Part(
       text="Hello! I'm here to help you identify and gather information about all your income sources for accurate tax filing. I'll ask you some questions to ensure we cover everything comprehensively."
     ),
   ],
   role='model'
 ),
 Content(
   parts=[
     Part(
       text='Yeahh '
     ),
   ],
   role='user'
 ),
 Content(
   parts=[
     Part(
       function_call=FunctionCall(
         args={},
         name='get_question'
       ),
       thought_signature=b'\n\xaa\x02\x01\xd1\xed\x8ao\xe5\xd6\xe9%i\x9f\x86\x92\x87\x8f\x80\xe4\xf2\x19{v9\x96\x9a&j\x15\x9eJ\xa1\xae\xa0\xdb\x9c\xce\xc8\x9e*\x94\xb9\x10\x93\x8c\xbb\xa0J\xe5-l\x8dD\xb8\xbb\xfe\xe8\xe4\x92\xc9\x942t\x87\xa6\xaf\xe3\x0c\xe1C\xeaWb\xf7\xee\x0c\x8e\x03\x8b"\x9f\x0b\xdd\x9b\xbbK*\xa2s\xf3eI\xc5I\xb9 ...'
     ),
   ],
   role='model'
 ),
 Part(
   function_response=FunctionResponse(
     name='get_question',
     response={
       'contents': (
         'Schedule80D',
         'Deduct

In [None]:
for i in history :
    if hasattr(i,'role') :
        print(f"=> {i.role} : {i.parts[0].text}")

=> model : Hello! I'm here to help you identify and gather information about all your income sources for accurate tax filing. I'll ask you some questions to ensure we cover everything comprehensively.
=> user : Hello
=> model : None
=> model : To start, let's talk about deductions. Do you have any medical insurance premiums that you've paid for yourself, your family, or your parents? These might qualify for deductions under Section 80D.
=> user : Before that can I ask a question ??
=> model : Of course! Please feel free to ask any questions you have. I'm here to help clarify anything that's unclear.
=> user : What was my total dividends last Year ??
=> model : None
=> model : Based on your AIS summary, your total dividend income from MOIL LIMITED for the last year was ₹32. This includes both the dividend where TDS was deducted under Section 194 and the dividend reported under SFT-015.
=> user : Great
=> model : Now, coming back to our discussion on deductions under Section 80D, do you 

In [None]:
for i in history :
    if hasattr(i,'role') :
        print(f"=> {i.role} : {i.parts[0].text}")

=> model : Hello! I'm here to help you identify and gather information about all your income sources for accurate tax filing. I'll ask you some questions to ensure we cover everything comprehensively.
=> user : Yeahh 
=> model : None
=> model : The first item on our checklist is "Deductions under Section 80D (Medical Insurance Premium)". This section relates to deductions you can claim for health insurance premiums paid for yourself, your family, or your parents.

Do you pay any medical insurance premiums? If so, for whom (yourself, spouse, children, parents) and what is the amount?
=> user : I dont have any Insurance  
=> model : No problem at all. We've noted that you don't have any medical insurance premiums to claim under Section 80D.

Let's move on to the next potential income source to ensure we cover all bases.

=> model : Next up is "Advance Tax and Self-Assessment Tax paid." This refers to any tax you might have paid during the financial year, either in advance (if you anticip