In [1]:
import pandas as pd
from faker import Faker
import random
import numpy as np

# Initialize Faker
fake = Faker()

# Generate fake data
data = []
for _ in range(2000):
    customer_id = fake.uuid4()
    name = fake.name()
    email = fake.email()
    address = fake.address()
    city = fake.city()
    country = fake.country()
    device_type = random.choice(['Desktop', 'Mobile', 'Tablet'])
    time_spent_on_site = round(random.uniform(1, 60), 2)  # in minutes
    products_viewed = random.randint(1, 20)
    products_purchased = random.randint(0, products_viewed)
    total_amount_spent = round(random.uniform(10, 500), 2)  # in dollars
    cart_abandoned = True if products_viewed > products_purchased else False

    data.append([customer_id, name, email, address, city, country, device_type,
                 time_spent_on_site, products_viewed, products_purchased,
                 total_amount_spent, cart_abandoned])

# Create DataFrame
columns = ['Customer ID', 'Name', 'Email', 'Address', 'City', 'Country', 'Device Type',
           'Time Spent on Site (min)', 'Products Viewed', 'Products Purchased',
           'Total Amount Spent ($)', 'Cart Abandoned']
df = pd.DataFrame(data, columns=columns)
df.to_csv('e_commerce_fake_data.csv', index=False)

# Calculate additional metrics
df['Conversion Rate'] = df['Products Purchased'] / df['Products Viewed']
df['Average Order Value ($)'] = df['Total Amount Spent ($)'] / np.maximum(df['Products Purchased'], 1)
df['Customer Lifetime Value ($)'] = df['Total Amount Spent ($)'] * (1 / df['Conversion Rate'])

# Charts data
sales_by_device_type = df.groupby('Device Type')['Total Amount Spent ($)'].sum()
time_spent_on_site = df.groupby('City')['Time Spent on Site (min)'].mean()
top_selling_products = df['Products Purchased'].value_counts().head(10)
customer_geographic_distribution = df['Country'].value_counts()

# Output
print("E-commerce Customer Analytics Data:")
print(df.head())

print("\nSales by Device Type:")
print(sales_by_device_type)

print("\nAverage Time Spent on Site by City:")
print(time_spent_on_site)

print("\nTop Selling Products:")
print(top_selling_products)

print("\nCustomer Geographic Distribution:")
print(customer_geographic_distribution)


E-commerce Customer Analytics Data:
                            Customer ID            Name  \
0  daf69cd5-a28d-482a-9c8b-71cc9a1c46c8  Michael Sawyer   
1  87adc209-17dc-4991-a2dd-2b698aa320ca      Adam Yoder   
2  55e6d0bb-2457-4be4-8717-dade34530824   Meghan Gentry   
3  9273ce14-d59d-4e15-8e10-3f0db889319e    Sarah Harvey   
4  1ae528e1-dfac-4a4d-a725-e591e0244292      Lisa Le MD   

                         Email  \
0     joshuamathis@example.org   
1  petersonwilliam@example.com   
2       heberterin@example.org   
3            kreed@example.org   
4          swalton@example.com   

                                             Address               City  \
0              138 Johnny Burgs\nDunnburgh, PW 42770    Christopherstad   
1  87736 Richard Mission Suite 964\nNorth Tammy, ...       Bakerborough   
2          1269 Morris Forks\nHubbardburgh, UT 15818   Port Williamtown   
3               9043 Tina Pike\nOrtegaberg, CO 11242      East Margaret   
4  115 Hayes Highway Apt. 181

In [1]:
import pandas as pd
from faker import Faker
import random
import numpy as np

# Initialize Faker
fake = Faker()

# Generate fake data
data = []
for _ in range(2000):
    customer_id = fake.uuid4()
    device_type = fake.random_element(elements=('Desktop', 'Mobile', 'Tablet'))
    time_spent_on_site = round(random.uniform(5, 60), 2)
    products_viewed = random.randint(1, 10)
    products_purchased = random.randint(0, products_viewed)
    order_value = round(random.uniform(10, 500), 2)
    country = fake.country()
    joining_date = fake.date_between(start_date='-5y', end_date='-1y')
    leaving_date = fake.date_between_dates(date_start=joining_date, date_end='-1d') if random.random() < 0.8 else None

    data.append([customer_id, device_type, time_spent_on_site,
                 products_viewed, products_purchased, order_value,
                 country, joining_date, leaving_date])

# Create DataFrame
columns = ['Customer ID', 'Device Type', 'Time Spent on Site',
           'Products Viewed', 'Products Purchased', 'Order Value',
           'Country', 'Joining Date', 'Leaving Date']
df = pd.DataFrame(data, columns=columns)


# Export data to CSV
df.to_csv('ecommerce_fake_data.csv', index=False)

# Output
print("Data exported to 'ecommerce_fake_data.csv'")

Data exported to 'ecommerce_fake_data.csv'


In [2]:
import pandas as pd
from faker import Faker
import random

# Initialize Faker
fake = Faker()

# List of example products
products = ['Laptop', 'Smartphone', 'Tablet', 'Headphones', 'Smartwatch', 'Camera', 'Printer', 'Monitor', 'Keyboard', 'Mouse']

# Generate fake data
data = []
for _ in range(2000):
    customer_id = fake.uuid4()
    device_type = fake.random_element(elements=('Desktop', 'Mobile', 'Tablet'))
    time_spent_on_site = round(random.uniform(5, 60), 2)
    products_viewed = random.randint(1, 10)
    products_purchased = random.randint(0, products_viewed)
    order_value = round(random.uniform(10, 500), 2)
    country = fake.country()
    joining_date = fake.date_between(start_date='-5y', end_date='-1y')
    leaving_date = fake.date_between_dates(date_start=joining_date, date_end='-1d') if random.random() < 0.8 else None
    product_name = random.choice(products)

    data.append([customer_id, device_type, time_spent_on_site,
                 products_viewed, products_purchased, order_value,
                 country, joining_date, leaving_date, product_name])

# Create DataFrame
columns = ['Customer ID', 'Device Type', 'Time Spent on Site',
           'Products Viewed', 'Products Purchased', 'Order Value',
           'Country', 'Joining Date', 'Leaving Date', 'Product Name']
df = pd.DataFrame(data, columns=columns)

# Export data to CSV
df.to_csv('ecommerce_fake_data.csv', index=False)

# Output
print("Data exported to 'ecommerce_fake_data.csv'")

Data exported to 'ecommerce_fake_data.csv'
