In [None]:
## Personal UPI Usage and Financial Analyzer using LLMs ##

In [33]:
!pip install pdfplumber
import pdfplumber
import re
from datetime import datetime
import pandas as pd



In [34]:
import logging
logging.getLogger("pdfminer").setLevel(logging.ERROR)

In [35]:
import pdfplumber
import re
import pandas as pd

# Step 1: Read PDF text
with pdfplumber.open("C:/Users/Mers_Johnson/Downloads/My Activity History.pdf") as pdf:
    raw_text = ""
    for page in pdf.pages:
        raw_text += page.extract_text() + "\n"

# Step 2: Split into chunks for each transaction
transactions = re.split(r'Google Pay\s*Paid', raw_text)[1:]  # skip before the first transaction

structured_data = []

for txn in transactions:
    txn = 'Paid' + txn  # add back the removed part

    # Extract Amount and Receiver
    match_payment = re.search(r'Paid ₹([\d,.]+) to (.*?) using', txn)
    match_datetime = re.search(r'(\d{1,2} \w+ 2025), (\d{2}:\d{2}:\d{2})', txn)
    match_description = re.search(r'(Paid ₹[\d,.]+ to .*?using.*)', txn)

    if match_payment and match_datetime:
        amount = float(match_payment.group(1).replace(',', ''))
        receiver = match_payment.group(2).strip()
        date = match_datetime.group(1)
        time = match_datetime.group(2)
        description = match_description.group(1).strip() if match_description else ""

        structured_data.append({
            "Date": date,
            "Time": time,
            "Amount": amount,
            "Receiver": receiver,
            "Description": description
        })

# Step 3: Convert to DataFrame
df = pd.DataFrame(structured_data)

# Step 4: Save to CSV
df.to_csv("structured_gpay_data.csv", index=False)
print("✅ Extracted and saved transactions to structured_gpay_data.csv")


✅ Extracted and saved transactions to structured_gpay_data.csv


In [36]:
# Normalize the description field by converting to lowercase and stripping whitespace
df['Description'] = df['Description'].str.lower().str.strip()

# Normalize 'Amount' by ensuring it's a float (remove commas if any)
df['Amount'] = df['Amount'].apply(lambda x: float(str(x).replace(',', '')))


In [37]:
def categorize_transaction(description):
    if any(keyword in description for keyword in ["zepto", "big bazaar", "dmart", "grocery"]):
        return "Groceries"
    elif any(keyword in description for keyword in ["zomato", "swiggy", "food", "eat", "delivery"]):
        return "Food Delivery"
    elif any(keyword in description for keyword in ["netflix", "hotstar", "spotify", "entertainment"]):
        return "Entertainment"
    elif any(keyword in description for keyword in ["amazon", "flipkart", "myntra", "shopping"]):
        return "Shopping"
    elif any(keyword in description for keyword in ["electricity", "gas", "bill", "water"]):
        return "Utilities"
    else:
        return "Others"

# Apply categorization function to the 'Description' column
df['Category'] = df['Description'].apply(categorize_transaction)


In [38]:
# Spending by category
category_spending = df.groupby('Category')['Amount'].sum().reset_index()

# Spending by date (monthly breakdown)
df['Month'] = pd.to_datetime(df['Date'], format='%d %b %Y').dt.to_period('M')
monthly_spending = df.groupby('Month')['Amount'].sum().reset_index()

# Display results
print("Category Spending Summary:")
print(category_spending)

print("Monthly Spending Summary:")
print(monthly_spending)


Category Spending Summary:
        Category    Amount
0  Entertainment    509.00
1  Food Delivery   4723.11
2      Groceries   5558.79
3         Others  47378.29
4       Shopping   1428.00
Monthly Spending Summary:
     Month    Amount
0  2025-01  12474.87
1  2025-02   8443.73
2  2025-03  23674.68
3  2025-04  10192.91
4  2025-05   4811.00


In [39]:
# Spending by hour of the day
df['Hour'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.hour
hourly_spending = df.groupby('Hour')['Amount'].sum().reset_index()

print("Hourly Spending Summary:")
print(hourly_spending)


Hourly Spending Summary:
    Hour    Amount
0      0    438.23
1      8    561.00
2      9    407.00
3     10   3514.24
4     11   1579.98
5     12   3613.29
6     13   3214.99
7     14   4474.25
8     15   7675.85
9     16  10699.88
10    17   1448.43
11    18   1894.93
12    19   5569.00
13    20   3261.00
14    21   7729.82
15    22   1960.30
16    23   1555.00


In [40]:
# Summarizing total spending by category
category_spending = df.groupby('Category')['Amount'].sum().reset_index()

# Identifying top spending categories
top_spending = category_spending.nlargest(3, 'Amount')

# Identifying wasteful spending (below ₹100)
wasteful_spending = df[df['Amount'] < 100]

# Monthly trends (optional)
df['Month'] = pd.to_datetime(df['Date'], format='%d %b %Y').dt.to_period('M')
monthly_spending = df.groupby('Month')['Amount'].sum().reset_index()

# Generate summary data
summary_data = {
    "total_spending_by_category": category_spending.to_dict(orient='records'),
    "top_spending_categories": top_spending.to_dict(orient='records'),
    "wasteful_spending": wasteful_spending[['Date', 'Receiver', 'Amount']].to_dict(orient='records'),
    "monthly_spending": monthly_spending.to_dict(orient='records'),
}


In [41]:
#Constructing a Personalized Financial Advice Prompt

financial_advice_prompt = f"""
You are a financial assistant. Based on the user's transaction data, please generate personalized financial advice.

### Spending Overview:
- **Total Spending by Category:**
  {summary_data['total_spending_by_category']}

- **Top Spending Categories:**
  {summary_data['top_spending_categories']}

- **Wasteful Spending (Small purchases below ₹100):**
  {summary_data['wasteful_spending']}

- **Monthly Spending Trends:**
  {summary_data['monthly_spending']}

### Financial Advice:
- Suggest a **budget** based on the user's spending trends.
- Recommend **ways to reduce wasteful spending** (e.g., small, frequent purchases).
- Suggest ways to **save more money**.
- Provide **actionable tips** to **cut down on discretionary spending** (e.g., entertainment, food delivery).
"""

# Optionally, print the prompt to see how it looks
print(financial_advice_prompt)



You are a financial assistant. Based on the user's transaction data, please generate personalized financial advice.

### Spending Overview:
- **Total Spending by Category:**
  [{'Category': 'Entertainment', 'Amount': 509.0}, {'Category': 'Food Delivery', 'Amount': 4723.11}, {'Category': 'Groceries', 'Amount': 5558.79}, {'Category': 'Others', 'Amount': 47378.29}, {'Category': 'Shopping', 'Amount': 1428.0}]

- **Top Spending Categories:**
  [{'Category': 'Others', 'Amount': 47378.29}, {'Category': 'Groceries', 'Amount': 5558.79}, {'Category': 'Food Delivery', 'Amount': 4723.11}]

- **Wasteful Spending (Small purchases below ₹100):**
  [{'Date': '10 May 2025', 'Receiver': 'Airtel Prepaid', 'Amount': 22.0}, {'Date': '5 May 2025', 'Receiver': 'Airtel Prepaid', 'Amount': 22.0}, {'Date': '24 Apr 2025', 'Receiver': 'SWIGGY INSTAMART', 'Amount': 89.0}, {'Date': '15 Apr 2025', 'Receiver': 'PON ARUNA STORE', 'Amount': 40.0}, {'Date': '13 Apr 2025', 'Receiver': 'SUNIL KUMAR K', 'Amount': 60.0}, {

In [43]:
pip install google-generativeai

Note: you may need to restart the kernel to use updated packages.


In [44]:
import google.generativeai as genai

In [45]:
import google.generativeai as genai

# Step 1: Configure your API key
genai.configure(api_key="AIzaSyDshX83nZWolqTXvpf1--fU2VvCfOexucQ")

# Step 2: Use the fully qualified model name
model = genai.GenerativeModel(model_name="models/gemini-1.5-flash")

# Step 3: Define a sample prompt
prompt = f"""
You are a financial assistant. Analyze the following user's spending data and give personalized financial advice:

 Total Spending by Category:
{summary_data['total_spending_by_category']}

 Top Spending Categories:
{summary_data['top_spending_categories']}

 Wasteful Spending (under ₹100):
{summary_data['wasteful_spending']}

 Monthly Spending Trend:
{summary_data['monthly_spending']}

 Your Task:
- Suggest a realistic monthly budget.
- Recommend ways to reduce wasteful spending.
- Highlight areas where spending can be optimized.
- Offer tips for better savings and financial discipline.
"""

# Step 4: Generate content
response = model.generate_content(prompt)

# Step 5: Display response
print("Personalized Financial Advice:\n")
print(response.text)


Personalized Financial Advice:

## Financial Analysis and Recommendations

Based on your provided spending data, here's a personalized financial plan:

**1. Current Financial Picture:**

Your spending shows significant variability across months.  While January and March were high-spending months, April and May show considerably lower spending.  The "Others" category represents a major concern, accounting for a disproportionately large share of your expenses (47378.29).  Without knowing what this category encompasses, it’s difficult to give specific advice, but it needs urgent attention.  Food delivery and groceries also constitute substantial portions of your spending, suggesting potential for optimization.

**2. Realistic Monthly Budget:**

To create a realistic budget, we need to understand your income.  However, based on your spending patterns, let's aim for an average monthly expenditure and identify areas for reduction:


| Category             | Current Avg. Spending | Proposed B