Generating Fake Data For Analyzing Personal Expenses.

In [2]:
from faker import Faker
import pandas as pd
import random
from datetime import datetime, timedelta

In [3]:
fake = Faker()

In [4]:
category_descriptions = {
    'Food': [
        "Dinner at a restaurant", "Lunch with friends", 
        "Snacks for cravings", "Takeout from a local diner", 
        "Coffee and pastries at a café"
    ],
    'Transportation': [
        "Gas for the car", "Uber ride to the airport", 
        "Public transport monthly pass", "Taxi fare to a meeting", 
        "Parking fees"
    ],
    'Bills': [
        "Electricity bill payment", "Water bill payment", 
        "Internet service charge", "Mobile phone bill", 
        "Insurance premium payment"
    ],
    'Groceries': [
        "Weekly grocery shopping", "Buying fresh produce", 
        "Snacks and drinks for the week", "Household supplies", 
        "Buying meat and dairy products"
    ],
    'Entertainment': [
        "Movie tickets for the weekend", "Concert tickets", 
        "Streaming service subscription", "Video game purchase", 
        "Dining out with friends"
    ],
    'Travel': [
        "Flight tickets to a destination", "Hotel booking for vacation", 
        "Car rental for the trip", "Travel insurance", 
        "Sightseeing tour tickets"
    ],
    'Subscriptions': [
        "Monthly magazine subscription", "Online course fee", 
        "Gym membership", "Software subscription", 
        "Streaming service subscription"
    ],
    'Personal': [
        "Clothing purchase", "Personal care products", 
         "Books and stationery", 
        "Hobby supplies"
    ],
    'Investment': [
        "Stock purchase", "Bond investment", 
        "Real estate investment", "Mutual fund contribution", 
        "Retirement account contribution", "Cryptocurrency purchase", 
        "Investment in startup", "Dividend reinvestment"
    ],
    'Gifts': [
        "Birthday gift for a friend", "Anniversary gift", 
        "Holiday gift", "Gift for a colleague", 
        "Charity donation"
    ]
}

In [5]:

def generate_expenses_for_month(month, num_records):
    expenses = []
    payment_modes = ['Cash', 'UPI', 'Debit Card', 'Credit Card', 'Net Banking']
    # Define the start and end dates for each month in 2024
    year = 2024  
    start_date = datetime(year, month, 1)
    if month == 12:
        end_date = datetime(year + 1, 1, 1)  # End of December is the start of January next year
    else:
        end_date = datetime(year, month + 1, 1)  # End of the month is the start of the next month

    for i in range(num_records):
       
        Random_date = fake.date_between(start_date, end_date - timedelta(days=1))    # Generate a random date within the specified month
        category = random.choice(list(category_descriptions.keys()))       # Randomly select a category
        payment_mode = random.choice(payment_modes)                         # Randomly select a payemtent_modes
        description = random.choice(category_descriptions[category])        # Randomly select a description from the chosen category
        amount_paid = round(random.uniform(5, 1000), 2)
        
         # Determine cashback based on category
        if category == 'Investment':
            cashback = 0.0  # No cashback for investment
        else:
            cashback = round(random.uniform(0, amount_paid * 0.1), 2)  # Up to 10% cashback
            
        expenses.append([Random_date, category, payment_mode, description, amount_paid, cashback])
    
    return expenses

In [6]:
# Function to save expenses to CSV
def save_expenses_to_csv():
    for month in range(1, 13):
        expenses = generate_expenses_for_month(month, 100)  # Generate 100 records for each month
        df = pd.DataFrame(expenses, columns=['Random_Date', 'Category', 'Payment_Mode', 'Description', 'Amount_Paid', 'Cashback'])
        df.to_csv(f'expenses_{month}.csv', index=False)
        print(f"Expenses for month {month} saved to expenses_{month}.csv")

# Save the generated expenses to CSV files
save_expenses_to_csv()

Expenses for month 1 saved to expenses_1.csv
Expenses for month 2 saved to expenses_2.csv
Expenses for month 3 saved to expenses_3.csv
Expenses for month 4 saved to expenses_4.csv
Expenses for month 5 saved to expenses_5.csv
Expenses for month 6 saved to expenses_6.csv
Expenses for month 7 saved to expenses_7.csv
Expenses for month 8 saved to expenses_8.csv
Expenses for month 9 saved to expenses_9.csv
Expenses for month 10 saved to expenses_10.csv
Expenses for month 11 saved to expenses_11.csv
Expenses for month 12 saved to expenses_12.csv
