In [None]:
import pandas as pd
from datetime import datetime, timedelta
import random

def generate_transaction_data(start_date, end_date, income_per_month, average_transactions_per_day):
    # Initialize variables
    current_balance = 0
    data = []

    # List of fictional London shop names
    london_shops = {
        'Coffee Shop': ['Bean Haven', 'Caffeine Fix', 'Espresso Junction', 'Mocha Haven'],
        'Restaurant': ['Savor Bistro', 'Taste of London', 'Gastronomy Central', 'Urban Palate'],
        'Supermarket': ['Metro Mart', 'City Grocers', 'London Fresh Market', 'Central Superstore'],
        'Gym': ['FitFusion', 'PowerHouse Gym', 'London Fitness Hub', 'Urban Wellness'],
        'Streaming Service': ['StreamEase', 'London Flicks', 'MediaHub', 'CinemaCity'],
        'Clothing Shop': ['Fashion Fusion', 'London Threads', 'Style Haven', 'Chic Boutique'],
        'Pub': ['The London Alehouse', 'Pub Central', 'Brewery Bliss', 'City Tavern']
    }

    # Generate transactions for each day
    current_date = start_date
    while current_date <= end_date:
        # Random time within the day
        random_hour = random.randint(0, 23)
        random_minute = random.randint(0, 59)
        random_second = random.randint(0, 59)
        random_time = f"{random_hour:02d}:{random_minute:02d}:{random_second:02d}"

        # Income transaction
        income_transaction = {
            'Date': current_date,
            'Time': random_time,
            'Amount': income_per_month,
            'Balance': current_balance + income_per_month,
            'ThirdPartyName': 'Employer'
        }
        data.append(income_transaction)
        current_balance += income_per_month

        # Random transactions
        num_transactions = random.randint(0, int(2 * average_transactions_per_day))
        for _ in range(num_transactions):
            expense_amount = random.uniform(1, 1000)
            category = random.choice(list(london_shops.keys()))
            shop_name = random.choice(london_shops[category])
            expense_transaction = {
                'Date': current_date,
                'Time': random_time,
                'Amount': -expense_amount,
                'Balance': current_balance - expense_amount,
                'ThirdPartyName': f'{shop_name}'
            }
            data.append(expense_transaction)
            current_balance -= expense_amount

        # Move to the next day
        current_date += timedelta(days=1)

    return pd.DataFrame(data)

# Set start and end dates
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)

# Set income per month
income_per_month = 2500

# Set average transactions per day
average_transactions_per_day = 10

# Generate synthetic transaction data
synthetic_data = generate_transaction_data(start_date, end_date, income_per_month, average_transactions_per_day)

# Display the synthetic data
print(synthetic_data)


In [None]:
!pip install pandas

In [None]:
!pip install numpy

In [None]:
import pandas as pd
import numpy as np

In [None]:
user_data = pd.read_csv("../user_data.csv")

In [None]:
user_data.head()

In [None]:
company_categories = {
    'Eating out': ['Tiger Tiger', 'Nandos', 'Pizza Express', 'The Royal Oak', 'Honest Burgers', 'Wetherspoon', 'Dominos'],
    'Coffee': ['Nero', 'Costa Coffee'],
    'Healthcare and Wellbeing': ['Pure gym', 'Boots', 'Lloyds pharmacy', 'Tony and Guy'],
    'Travel': ['TFL'],
    'Retail shops': ['Zara', 'Next', 'Amazon', 'Waitrose', 'Disney shop', 'John Lewis', 'New look', 'River Island', 'Asos', 'RayBan', 'HnM', 'B&Q', 'The Griffin', 'The Newman Arms', 'Trotters'],
    'Supermarkets': ['Tesco', 'Waitrose'],
    'Insurance and finance': ['Halifax', 'Churchill', 'Lloyds Bank'],
    'Bills': ['Virgin Media', 'Amazon prime', 'Netflix', 'Octopus energy','O2'],
    'Transfer to accounts': ['11234567', '111345678','11234678']
}

In [None]:
user_data['category'] = user_data['third_party_name'].map({company: category for category, companies in company_categories.items() for company in companies})

user_data.head()

In [None]:
user_data.to_csv("./user_data.csv")

In [None]:
user_data.head()

In [None]:
monthly_spending  = user_data.groupby("category").amount.sum().sort_values()*-1

In [None]:
monthly_spending

In [None]:
spend_limits = {
    'Eating out': 300,
    'Coffee': 50,
    'Healthcare and Wellbeing': 150,
    'Travel': 100,
    'Retail shops': 250,
    'Supermarkets': 400,
#     'Insurance and finance': 1000,
    'Bills': 200,
    'Transfer to accounts':500 
}

In [None]:
for category,spend_limit in spend_limits.items():
    difference = spend_limit - monthly_spending.get(category, 0)
    
    if difference < 0:
        print(f"For {category}, you have overspent by {abs(difference):.2f}.")
    elif difference > 0:
        print(f"For {category}, you have {difference:.2f} left to spend.")
    else:
        print(f"For {category}, you have exactly spent your monthly limit.")