In [2]:
import os
import numpy as np
import time
import faiss
import torch
from dotenv import load_dotenv

# --- FAISS and Local Model Imports ---
from sentence_transformers import SentenceTransformer
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# --- LangChain Imports for RAG ---
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

True

In [3]:
# Use 'cuda' if you have a GPU, otherwise 'cpu'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Load the embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

# Load the Language Model (LLM) - Flan-T5 is excellent for instruction-following
llm_model_id = "google/flan-t5-base" # Using 'base' for better performance on consumer hardware
tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_id).to(device)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024, # Increased token limit for the detailed final analysis
    device=0 if device == 'cuda' else -1
)
llm = HuggingFacePipeline(pipeline=pipe)

print("Models loaded successfully! ✅")

Using device: cuda


Device set to use cuda:0


Models loaded successfully! ✅


  llm = HuggingFacePipeline(pipeline=pipe)


In [None]:
# This is the knowledge base for your bot
doc_directory = './documents'

print(f"Loading financial guide from '{doc_directory}'...")
loader = TextLoader("./manageMoney.txt", encoding='utf8')
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_chunks = text_splitter.split_documents(documents)
print(f"Split guide into {len(all_chunks)} text chunks.")

print("Creating embeddings and building FAISS index...")
chunk_texts = [chunk.page_content for chunk in all_chunks]
embeddings = embedding_model.encode(chunk_texts, show_progress_bar=True)

d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)

print("FAISS index (knowledge base) is ready! ✅")

Loading financial guide from './documents'...
Split guide into 112 text chunks.
Creating embeddings and building FAISS index...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

FAISS index with <faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x00000239AF987750> > vectors created.
FAISS index (knowledge base) is ready! ✅


In [11]:
# A much more efficient and concise constitution for the model
FIN_FRIEND_CONSTITUTION_SHORT = """
**Persona:** You are Fin-Friend, an expert and empathetic financial guide in India. Your tone is supportive and simple.

**Core Rules:**
1.  **Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples."
2.  **Use Context:** Base all strategies ONLY on the provided context from the financial guide.
3.  **Disclaimer:** Every analysis MUST end with: "This is for informational purposes only and is not financial advice. Please consult with a qualified financial advisor before making any decisions."

**Your Task:**
Analyze the user's data using the retrieved context and provide a structured financial health report with actionable educational points. Start your internal reasoning with "Let's think step by step".
"""

def format_user_data_for_llm(data):
    """
    Formats the collected user data into a clean, human-readable string
    for the language model.
    """
    report_lines = []
    for key, value in data.items():
        title = key.replace('_', ' ').title()
        
        if isinstance(value, dict): # Correctly formats the nested expenses dictionary
            report_lines.append(f"**{title}:**")
            for sub_key, sub_value in value.items():
                sub_title = sub_key.replace('_', ' ').title()
                report_lines.append(f"- {sub_title}: {sub_value}")
        else:
            report_lines.append(f"**{title}:**\n{value}")
        
        report_lines.append("") # Adds a blank line for better readability
        
    return "\n".join(report_lines)

def get_user_input(prompt_text):
    """A helper function to handle user input."""
    return input(prompt_text + "\n> ")

In [13]:
# --- Phase 1: Structured Data Gathering ---
user_data = {}
expenses = {}

print("Hello! I'm Fin-Friend... Let's get started!\n")

# Gather income
user_data['income_salary'] = get_user_input("First, what is your fixed monthly take-home salary?")
user_data['income_other'] = get_user_input("Do you have any other sources of income (like freelance work or bonuses)?")

# Gather expenses one by one for clean data
print("\nGreat. Now let's break down your monthly expenses. Just enter the amount for each.")
expenses['rent_or_emi'] = get_user_input("- Rent or Home Loan EMI:")
expenses['utilities'] = get_user_input("- Electricity, Water, Gas:")
expenses['internet_and_phone'] = get_user_input("- Internet & Phone Bills:")
expenses['groceries'] = get_user_input("- Groceries:")
expenses['eating_out'] = get_user_input("- Eating Out/Ordering In:")
expenses['transport'] = get_user_input("- Fuel/Public Transport:")
expenses['shopping'] = get_user_input("- Shopping (Clothes, etc.):")
expenses['entertainment'] = get_user_input("- Entertainment & Subscriptions:")
user_data['expenses_structured'] = expenses

# Gather goals, investments, and debts
user_data['financial_goals'] = get_user_input("\nWhat are your major financial goals (e.g., vacation, car, retirement)?")
user_data['current_investments'] = get_user_input("\nBriefly, what investments do you currently have (e.g., Mutual Funds, PPF, Stocks)?")
user_data['outstanding_debts'] = get_user_input("\nBriefly, what outstanding debts do you have (e.g., Credit Card, Personal Loan)?")

print("\nThank you for the information. Generating your personalized financial health report...")

# --- Phase 2: RAG-Powered Analysis ---

# Use the smart formatting function to create a clean report for the LLM
formatted_user_data = format_user_data_for_llm(user_data)


Hello! I'm Fin-Friend... Let's get started!


Great. Now let's break down your monthly expenses. Just enter the amount for each.

Thank you for the information. Generating your personalized financial health report...


In [None]:
# The query used to find relevant information from your financial guide
analysis_query = "Provide a comprehensive financial health check-up, including analysis of cash flow, savings rate, and debt. Offer educational strategies on budgeting, debt management, and investing based on this user's data."

# Retrieve relevant context from the FAISS knowledge base
query_embedding = embedding_model.encode([analysis_query])
k = 3 # Retrieve top 3 most relevant chunks to save tokens
distances, indices = index.search(query_embedding, k)
retrieved_context = "\n\n".join([all_chunks[i].page_content for i in indices[0]])





--- Your Financial Health Report ---

**Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 3. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 4. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 5. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 6. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 7. **Your Task:** Analyze, Don't Advise:** Your goal is a financial 

In [15]:
# Define the final prompt template
final_prompt_template = """
{constitution}

---
**RETRIEVED CONTEXT FROM THE FINANCIAL GUIDE:**
{retrieved_context}
---
**USER'S FINANCIAL DATA:**
{user_data}
---
**YOUR TASK:**
Based on your constitution and the retrieved context, perform a complete financial health analysis for the user and provide actionable educational information.
"""


In [16]:
final_prompt = ChatPromptTemplate.from_template(final_prompt_template)

# Create and invoke the final analysis chain


In [17]:
analysis_chain = final_prompt | llm | StrOutputParser()



In [18]:
# Invoke the chain with the clean, formatted data
final_report = analysis_chain.invoke({
    "constitution": FIN_FRIEND_CONSTITUTION_SHORT,
    "retrieved_context": retrieved_context,
    "user_data": formatted_user_data
})

print("\n\n--- Your Financial Health Report ---\n")
print(final_report)



--- Your Financial Health Report ---

**Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 3. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 4. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 5. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 6. **Your Task:** Analyze, Don't Advise:** Your goal is a financial health check-up. NEVER give direct financial advice. Frame outputs as "educational information" or "examples." 7. **Your Task:** Analyze, Don't Advise:** Your goal is a financial 