In [1]:
import pandas as pd
from faker import Faker
import random
import numpy as np

# Initialize Faker
fake = Faker()

# Generate fake data
data = []
for _ in range(2000):
    customer_id = fake.uuid4()
    customer_name = fake.name()
    age = random.randint(18, 80)
    income = round(random.uniform(20000, 200000), 2)
    investment_type = fake.random_element(elements=('Stocks', 'Bonds', 'Mutual Funds', 'ETFs', 'Real Estate'))
    inflow = round(random.uniform(1000, 5000), 2)
    outflow = round(random.uniform(500, 3000), 2)
    feedback = random.choice(['Excellent', 'Good', 'Average', 'Poor'])
    churn = random.choice([True, False])
    acquisition_cost = round(random.uniform(50, 500), 2)
    
    data.append([customer_id, customer_name, age, income, investment_type, inflow, outflow, feedback, churn, acquisition_cost])

# Create DataFrame
columns = ['Customer ID', 'Customer Name', 'Age', 'Income', 'Investment Type', 'Inflow', 'Outflow', 'Feedback', 'Churn', 'Acquisition Cost']
df = pd.DataFrame(data, columns=columns)

# Calculate additional metrics
df['Net Profit Margin'] = (df['Inflow'] - df['Outflow']) / df['Inflow']
df['Assets Under Management'] = df['Income'] - df['Outflow']
df['Customer Churn'] = df['Churn'].astype(int)  # Convert boolean to integer
df['Cost Per Acquisition'] = df['Acquisition Cost'] / np.maximum(df['Churn'], 1)

# Export data to CSV
df.to_csv('finance_fake_data.csv', index=False)

# Output
print("Data exported to 'finance_fake_data.csv'")


Data exported to 'finance_fake_data.csv'


In [2]:
import pandas as pd
from faker import Faker
import random
import numpy as np
from datetime import datetime, timedelta

# Initialize Faker
fake = Faker()

# Generate fake data
data = []
start_date = datetime.now() - timedelta(days=3*365)
end_date = datetime.now()

for _ in range(2000):
    customer_id = fake.uuid4()
    customer_name = fake.name()
    age = random.randint(18, 80)
    income = round(random.uniform(20000, 200000), 2)
    investment_type = fake.random_element(elements=('Stocks', 'Bonds', 'Mutual Funds', 'ETFs', 'Real Estate'))
    inflow = round(random.uniform(1000, 5000), 2)
    outflow = round(random.uniform(500, 3000), 2)
    feedback = random.choice(['Excellent', 'Good', 'Average', 'Poor'])
    churn = random.choice([True, False])
    acquisition_cost = round(random.uniform(50, 500), 2)
    date = fake.date_between(start_date=start_date, end_date=end_date)

    current_year = datetime.now().year
    customer_type = 'New' if date.year == current_year else 'Old'
    
    data.append([customer_id, customer_name, age, income, investment_type, inflow, outflow, feedback, churn, acquisition_cost, date,customer_type])

# Create DataFrame
columns = ['Customer ID', 'Customer Name', 'Age', 'Income', 'Investment Type', 'Inflow', 'Outflow', 'Feedback', 'Churn', 'Acquisition Cost', 'Date','customer type']
df = pd.DataFrame(data, columns=columns)

# Calculate additional metrics
df['Net Profit Margin'] = (df['Inflow'] - df['Outflow']) / df['Inflow']
df['Assets Under Management'] = df['Income'] - df['Outflow']
df['Customer Churn'] = df['Churn'].astype(int)  # Convert boolean to integer
df['Cost Per Acquisition'] = df['Acquisition Cost'] / np.maximum(df['Churn'], 1)

# Export data to CSV
df.to_csv('finance_data_v2.csv', index=False)

# Output
print("Data exported to 'finance_fake_data_with_date.csv'")

Data exported to 'finance_fake_data_with_date.csv'
