**STEP 2. PROTOTYPE DEVELOPMENT**

In [None]:
# Load Datasets
import pandas as pd

# Load budget data
budget_df = pd.read_csv('/content/Budget.csv')
print("Budget Data:")
print(budget_df.head())

# Load personal transactions data
transactions_df = pd.read_csv('/content/personal_transactions.csv')

# Attempt to infer the date format automatically
transactions_df['Date'] = pd.to_datetime(transactions_df['Date'], infer_datetime_format=True, errors='coerce')

# Check for any dates that couldn't be parsed
print(transactions_df[transactions_df['Date'].isna()])

print("\nPersonal Transactions Data:")
print(transactions_df.head())

Budget Data:
                 Category  Budget
0          Alcohol & Bars      50
1          Auto Insurance      75
2            Coffee Shops      15
3  Electronics & Software       0
4           Entertainment      25
Empty DataFrame
Columns: [Date, Description, Amount, Transaction Type, Category, Account Name]
Index: []

Personal Transactions Data:
        Date          Description   Amount Transaction Type  \
0 2018-01-01               Amazon    11.11            debit   
1 2018-01-02     Mortgage Payment  1247.44            debit   
2 2018-01-02      Thai Restaurant    24.22            debit   
3 2018-01-03  Credit Card Payment  2298.09           credit   
4 2018-01-04              Netflix    11.76            debit   

              Category   Account Name  
0             Shopping  Platinum Card  
1      Mortgage & Rent       Checking  
2          Restaurants    Silver Card  
3  Credit Card Payment  Platinum Card  
4        Movies & DVDs  Platinum Card  


  transactions_df['Date'] = pd.to_datetime(transactions_df['Date'], infer_datetime_format=True, errors='coerce')


In [None]:
# Spend Categorization using NLP
import spacy

# Load spaCy model
nlp = spacy.load('en_core_web_sm')

# Define a function to categorize transactions using NLP
def categorize_transaction(description):
    doc = nlp(description)
    # Define some simple keyword matching for categories
    if any(token.lemma_ in ['coffee'] for token in doc):
        return 'Coffee Shops'
    elif any(token.lemma_ in ['gas', 'fuel'] for token in doc):
        return 'Gas & Fuel'
    elif any(token.lemma_ in ['grocery'] for token in doc):
        return 'Groceries'
    elif any(token.lemma_ in ['restaurant'] for token in doc):
        return 'Restaurants'
    elif any(token.lemma_ in ['internet'] for token in doc):
        return 'Internet'
    else:
        return 'Other'

# Apply the function to categorize the transactions if not already categorized
transactions_df['NLP_Category'] = transactions_df.apply(
    lambda row: row['Category'] if pd.notna(row['Category']) else categorize_transaction(row['Description']),
    axis=1
)
print("\nTransactions with NLP Categories:")
print(transactions_df.head())



Transactions with NLP Categories:
        Date          Description   Amount Transaction Type  \
0 2018-01-01               Amazon    11.11            debit   
1 2018-01-02     Mortgage Payment  1247.44            debit   
2 2018-01-02      Thai Restaurant    24.22            debit   
3 2018-01-03  Credit Card Payment  2298.09           credit   
4 2018-01-04              Netflix    11.76            debit   

              Category   Account Name         NLP_Category  
0             Shopping  Platinum Card             Shopping  
1      Mortgage & Rent       Checking      Mortgage & Rent  
2          Restaurants    Silver Card          Restaurants  
3  Credit Card Payment  Platinum Card  Credit Card Payment  
4        Movies & DVDs  Platinum Card        Movies & DVDs  


In [None]:
# Comparison with Budget
# Merge transactions with the budget based on the NLP-derived category
merged_df = pd.merge(transactions_df, budget_df, left_on='NLP_Category', right_on='Category', how='left', suffixes=('', '_Budget'))

# Calculate total spending per category
spending_per_category = merged_df.groupby('NLP_Category')['Amount'].sum().reset_index()

# Merge spending with budget data
comparison_df = pd.merge(spending_per_category, budget_df, left_on='NLP_Category', right_on='Category', how='left')

# Provide advice based on comparison
comparison_df['Advice'] = comparison_df.apply(
    lambda row: 'On track' if -row['Amount'] <= row['Budget'] else 'Exceeding budget', axis=1
)

print("\nComparison of Spending with Budget and Advice:")
print(comparison_df[['NLP_Category', 'Amount', 'Budget', 'Advice']])



Comparison of Spending with Budget and Advice:
              NLP_Category    Amount  Budget            Advice
0           Alcohol & Bars    539.13    50.0          On track
1           Auto Insurance   1350.00    75.0          On track
2             Coffee Shops    115.54    15.0          On track
3      Credit Card Payment  63561.12     NaN  Exceeding budget
4   Electronics & Software    719.00     0.0          On track
5            Entertainment      9.62    25.0          On track
6                Fast Food    330.63    15.0          On track
7            Food & Dining     77.75     NaN  Exceeding budget
8               Gas & Fuel   1715.17    75.0          On track
9                Groceries   2795.21   150.0          On track
10                 Haircut    378.00    30.0          On track
11        Home Improvement  19092.87   250.0          On track
12                Internet   1570.88    75.0          On track
13            Mobile Phone   1680.40    65.0          On track
14     