In [1]:
!pip install pandas numpy faker

Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.1.0


In [2]:
import pandas as pd
import random
from faker import Faker

fake = Faker()

# Data options
us_payment_methods = ['Card', 'Bank Transfer', 'PayPal', 'Apple Pay']
donation_types = ['Zakat', 'Sadaqah', 'Qurbani', 'Fidya', 'Zakat al Fitr']
projects = ['Orphan Children', 'Feed the Hungry', 'Homeless Shelter', 'Medical Aid', 'Back to School Kits']
us_states = ['California', 'New York', 'Texas', 'Florida', 'Illinois', 'Georgia', 'Ohio', 'North Carolina']

data = []

for _ in range(2381):
    name = fake.name()
    email = fake.email()
    phone = fake.phone_number()
    amount = round(random.uniform(10, 5000), 2)
    dtype = random.choice(donation_types)
    proj = random.choice(projects)
    date = fake.date_between_dates(
        date_start=pd.to_datetime("2024-01-01").date(),
        date_end=pd.to_datetime("2024-12-31").date()
    )
    location = f"{fake.city()}, {random.choice(us_states)}"
    payment = random.choice(us_payment_methods)
    notes = fake.sentence(nb_words=6)

    # Imperfections
    if random.random() < 0.1:
        email = None
    if random.random() < 0.05:
        amount *= 10  # outlier
    if random.random() < 0.05:
        dtype = dtype.lower()
    if random.random() < 0.05:
        proj += " " + fake.word()

    data.append([
        name, email, phone, amount, dtype,
        proj, date, location, payment, notes
    ])

# DataFrame
columns = [
    'Donor Name', 'Email', 'Phone', 'Donation Amount (USD)', 'Donation Type',
    'Project', 'Date', 'Location', 'Payment Method', 'Notes'
]
df = pd.DataFrame(data, columns=columns)

# Add some duplicates
df = pd.concat([df, df.sample(frac=0.05)], ignore_index=True)

# Save
df.to_csv("us_donations_2024.csv", index=False)
print("✅ us_donations_2024.csv has been saved!")


✅ us_donations_2024.csv has been saved!


In [3]:
import pandas as pd
import random
from faker import Faker

# Setup
fake = Faker()

# Bangladeshi names (sample lists)
bd_first_names = ['Rahim', 'Karim', 'Hasan', 'Anika', 'Fatima', 'Rafi', 'Nusrat', 'Sadia', 'Tanvir', 'Mizan']
bd_last_names = ['Hossain', 'Khan', 'Ahmed', 'Rahman', 'Chowdhury', 'Begum', 'Islam', 'Kabir', 'Sarker', 'Ali']

# Other data options
bd_locations = ['Dhaka', 'Chattogram', 'Khulna', 'Rajshahi', 'Sylhet', 'Barisal', 'Rangpur', 'Mymensingh']
payment_methods = ['bKash', 'Nagad', 'Bank Transfer', 'Card']
donation_types = ['Zakat', 'Sadaqah', 'Qurbani', 'Fidya', 'Zakat al Fitr']
projects = ['Orphan Children', 'Feed the Hungry', 'Rohingya Refugee Support', 'Medical Care', 'Eid Gifts For Children']
bd_email_domains = ['gmail.com']
bd_mobile_prefixes = ['13', '14', '15', '16', '17', '18', '19']

# Generate data
data = []
for _ in range(927):
    first = random.choice(bd_first_names)
    last = random.choice(bd_last_names)
    donor_name = f"{first} {last}"

    # Phone number and email
    phone = f"+880{random.choice(bd_mobile_prefixes)}{random.randint(10000000, 99999999)}"
    email = f"{first.lower()}.{last.lower()}{random.randint(1, 999)}@{random.choice(bd_email_domains)}"

    # Other fields
    amount = round(random.uniform(100, 10000), 2)
    dtype = random.choice(donation_types)
    proj = random.choice(projects)
    date = fake.date_between_dates(
        date_start=pd.to_datetime("2024-01-01").date(),
        date_end=pd.to_datetime("2024-12-31").date()
    )
    location = random.choice(bd_locations)
    payment = random.choice(payment_methods)
    notes = fake.sentence(nb_words=6)

    # Imperfections
    if random.random() < 0.1:
        email = None
    if random.random() < 0.05:
        amount *= 10  # outlier
    if random.random() < 0.05:
        dtype = dtype.lower()
    if random.random() < 0.05:
        proj += " " + fake.word()

    data.append([
        donor_name, email, phone, amount, dtype,
        proj, date, location, payment, notes
    ])

# Create DataFrame
columns = [
    'Donor Name', 'Email', 'Phone', 'Donation Amount (BDT)', 'Donation Type',
    'Project', 'Date', 'Location', 'Payment Method', 'Notes'
]
df = pd.DataFrame(data, columns=columns)

# Add some duplicates
df = pd.concat([df, df.sample(frac=0.05)], ignore_index=True)

# Save
df.to_csv("bangladesh_donations_2024.csv", index=False)
print("✅ bangladesh_donations_2024.csv has been saved!")


✅ bangladesh_donations_2024.csv has been saved!
