# Dataset 1

In [1]:
import pandas as pd
import numpy as np

# Setting a random seed for reproducibility
np.random.seed(42)

# Generate a date range for the monthly data (for 3 years to get a good amount of data)
date_range = pd.date_range(start='2020-01-01', end='2022-12-31', freq='M')

# Number of rows (one for each month)
num_rows = len(date_range)

# Generate random data for each KPI metric
data = {
    'Date': date_range,
    'Monthly Sales': np.random.randint(100000, 200000, size=num_rows),
    'Average Transaction Value': np.random.randint(50, 200, size=num_rows),
    'Customer Retention Rate': np.random.uniform(0.5, 0.9, size=num_rows),
    'New Customers Acquired': np.random.randint(100, 1000, size=num_rows),
}

# Create a DataFrame from the data
df = pd.DataFrame(data)

# Generate random data for product category sales
product_categories = ['Electronics', 'Furniture', 'Groceries', 'Clothing', 'Accessories']
for category in product_categories:
    df[f'{category} Sales'] = np.random.randint(10000, 50000, size=num_rows)

# Generate random data for customer demographics
demographics = {
    'Age 18-25': np.random.randint(50, 200, size=num_rows),
    'Age 26-35': np.random.randint(100, 300, size=num_rows),
    'Age 36-45': np.random.randint(80, 250, size=num_rows),
    'Age 46-60': np.random.randint(60, 200, size=num_rows),
    'Age 60+': np.random.randint(30, 100, size=num_rows),
    'Male': np.random.randint(100, 500, size=num_rows),
    'Female': np.random.randint(100, 500, size=num_rows),
    'Other': np.random.randint(10, 50, size=num_rows),
}
for demo, values in demographics.items():
    df[demo] = values

# Generate random data for peak shopping hours
peak_hours = ['Morning (6-12)', 'Afternoon (12-18)', 'Evening (18-24)', 'Night (0-6)']
for hour in peak_hours:
    df[hour] = np.random.randint(100, 1000, size=num_rows)

# Display the first few rows of the DataFrame
df.head()

# Save the DataFrame to a CSV file
df.to_csv('Retail_Dashboard_Data.csv', index=False)


# Dataset 2

In [6]:
import random
from faker import Faker
import pandas as pd

fake = Faker()
Faker.seed(12345)
random.seed(12345)

# Initialize lists to store the generated data
dates = []
device_types = ['Mobile', 'Desktop', 'Tablet']
devices = []
conversion_rates = []
cart_abandonment_rates = []
average_order_values = []
customer_lifetime_values = []
time_spent_on_site = []
top_selling_products = []
customer_geographic_distributions = []

# Generate 1000 rows of fake data
for month in range(1000):
    date = fake.date_between(start_date='-3y', end_date='today')
    dates.append(date)
    
    device = random.choice(device_types)
    devices.append(device)
    
    conversion_rate = random.uniform(0.01, 0.2)
    conversion_rates.append(conversion_rate)
    
    cart_abandonment_rate = random.uniform(0.1, 0.9)
    cart_abandonment_rates.append(cart_abandonment_rate)
    
    average_order_value = random.uniform(20.0, 200.0)
    average_order_values.append(average_order_value)
    
    customer_lifetime_value = random.uniform(100.0, 1000.0)
    customer_lifetime_values.append(customer_lifetime_value)
    
    time_spent = random.uniform(1.0, 1000.0)
    time_spent_on_site.append(time_spent)
    
    top_selling_product = fake.word()
    top_selling_products.append(top_selling_product)
    
    customer_geographic_distribution = fake.country()
    customer_geographic_distributions.append(customer_geographic_distribution)

# Create a DataFrame from the generated data
data = {
    'Date': dates,
    'Device Type': devices,
    'Conversion Rate': conversion_rates,
    'Cart Abandonment Rate': cart_abandonment_rates,
    'Average Order Value': average_order_values,
    'Customer Lifetime Value': customer_lifetime_values,
    'Time Spent on Site': time_spent_on_site,
    'Top Selling Product': top_selling_products,
    'Customer Geographic Distribution': customer_geographic_distributions,
}

df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('fake_ecommerce_data.csv', index=False)

In [3]:
# Create a list of fake data for healthcare dashboard
healthcare_data = []

for _ in range(num_rows):
    date = fake.date_between(start_date='-2y', end_date='today')
    patient_id = fake.uuid4()
    age = random.randint(0, 100)  # age in years
    satisfaction_score = random.uniform(0, 100)  # satisfaction score in percentage
    wait_time = random.randint(5, 300)  # wait time in minutes
    readmission = random.choice([True, False])  # whether the patient was readmitted
    telehealth_used = random.choice([True, False])  # whether telehealth was used
    treatment_offered = fake.word()
    
    healthcare_data.append([date, patient_id, age, satisfaction_score, wait_time, readmission, telehealth_used, treatment_offered])

# Create a DataFrame from the list of data
healthcare_df = pd.DataFrame(healthcare_data, columns=['Date', 'PatientID', 'Age', 'PatientSatisfaction', 'WaitTime', 'Readmission', 'TelehealthUtilization', 'TreatmentOffered'])

# Save the DataFrame to a CSV file
csv_file_path_healthcare = 'Healthcare_Fake_Data.csv'
healthcare_df.to_csv(csv_file_path_healthcare, index=False)

healthcare_df.head(), csv_file_path_healthcare


(         Date                             PatientID  Age  PatientSatisfaction  \
 0  2022-08-17  385ce350-3690-4524-ad48-dfd8a597683f   21            43.957494   
 1  2021-09-19  b79ea253-5103-4db4-a201-76f54cecebad   59            57.481323   
 2  2022-10-27  91219a44-889f-4853-8ca9-6a5cc8e0571f   24            36.083629   
 3  2021-12-23  aa6fe60f-27be-475b-a97b-61c5f0ef6a6a   36            62.081074   
 4  2022-12-17  ee89f8de-0bd8-49c3-949a-ecb4ed4b9915   76            84.498073   
 
    WaitTime  Readmission  TelehealthUtilization TreatmentOffered  
 0       158        False                   True             ball  
 1        49         True                   True               or  
 2       161        False                  False           wonder  
 3       109         True                  False             draw  
 4        57        False                   True             mind  ,
 'Healthcare_Fake_Data.csv')

In [4]:
# Create a list of fake data for finance dashboard
finance_data = []

for _ in range(num_rows):
    date = fake.date_between(start_date='-2y', end_date='today')
    customer_id = fake.uuid4()
    age = random.randint(18, 80)  # age in years
    assets_under_management = random.uniform(10000, 5000000)  # assets under management in USD
    net_profit_margin = random.uniform(0, 100)  # net profit margin in percentage
    customer_churn = random.choice([True, False])  # whether the customer has churned
    cost_per_acquisition = random.uniform(100, 10000)  # cost per acquisition in USD
    investment_type = random.choice(['Stocks', 'Bonds', 'Mutual Funds', 'Real Estate', 'Cryptocurrency'])
    inflow = random.uniform(1000, 50000)  # monthly inflow in USD
    outflow = random.uniform(1000, 50000)  # monthly outflow in USD
    feedback = fake.sentence()
    
    finance_data.append([date, customer_id, age, assets_under_management, net_profit_margin, customer_churn, cost_per_acquisition, investment_type, inflow, outflow, feedback])

# Create a DataFrame from the list of data
finance_df = pd.DataFrame(finance_data, columns=['Date', 'CustomerID', 'Age', 'AssetsUnderManagement', 'NetProfitMargin', 'CustomerChurn', 'CostPerAcquisition', 'InvestmentType', 'MonthlyInflow', 'MonthlyOutflow', 'CustomerFeedback'])

# Save the DataFrame to a CSV file
csv_file_path_finance = 'Finance_Fake_Data.csv'
finance_df.to_csv(csv_file_path_finance, index=False)

finance_df.head(), csv_file_path_finance


(         Date                            CustomerID  Age  \
 0  2021-09-28  29256bdc-ad88-4a3e-a021-fe053c231d78   58   
 1  2022-08-07  3af60340-4eff-4218-b8e4-c57faf3d724f   80   
 2  2022-09-11  d3f36fda-9a22-4039-a371-116e017bad77   37   
 3  2023-02-19  dd4db761-22b7-4ecb-900a-d00df8a7587c   76   
 4  2023-04-02  1008b874-4638-4970-b749-122741cebb36   67   
 
    AssetsUnderManagement  NetProfitMargin  CustomerChurn  CostPerAcquisition  \
 0           1.038094e+06        81.218761          False         2492.317693   
 1           4.774735e+06        28.495752           True         9490.734732   
 2           2.450384e+05        85.874541           True         9879.369906   
 3           7.950186e+05        51.971370          False         4442.657727   
 4           2.838648e+06        96.332788           True         6159.766536   
 
    InvestmentType  MonthlyInflow  MonthlyOutflow  \
 0  Cryptocurrency    2952.508845    45751.145084   
 1    Mutual Funds   12340.048713    2

In [5]:
# Create a list of fake data for real estate dashboard
real_estate_data = []

for _ in range(num_rows):
    date = fake.date_between(start_date='-2y', end_date='today')
    property_id = fake.uuid4()
    property_type = random.choice(['Apartment', 'House', 'Condo', 'Townhouse'])
    is_occupied = random.choice([True, False])  # occupancy status
    monthly_revenue = random.uniform(1000, 10000) if is_occupied else 0  # monthly revenue in USD
    rental_length = random.randint(1, 24) if is_occupied else 0  # rental length in months
    maintenance_cost = random.uniform(100, 1000)  # maintenance cost in USD
    maintenance_issue_logged = fake.sentence()  # description of maintenance issue logged
    customer_satisfaction = random.uniform(0, 100) if is_occupied else None  # customer satisfaction score in percentage

    real_estate_data.append([date, property_id, property_type, is_occupied, monthly_revenue, rental_length, maintenance_cost, maintenance_issue_logged, customer_satisfaction])

# Create a DataFrame from the list of data
real_estate_df = pd.DataFrame(real_estate_data, columns=['Date', 'PropertyID', 'PropertyType', 'IsOccupied', 'MonthlyRevenue', 'RentalLength', 'MaintenanceCost', 'MaintenanceIssueLogged', 'CustomerSatisfaction'])

# Save the DataFrame to a CSV file
csv_file_path_real_estate = 'Real_Estate_Fake_Data.csv'
real_estate_df.to_csv(csv_file_path_real_estate, index=False)

real_estate_df.head(), csv_file_path_real_estate


(         Date                            PropertyID PropertyType  IsOccupied  \
 0  2022-12-02  ca96de89-56dc-451a-920e-a536341b51c6        Condo        True   
 1  2021-09-28  7f2b5551-097b-4f1c-a1c6-79aa58d7209e    Townhouse       False   
 2  2022-09-09  757d7acc-81e8-4876-bf7b-e2ea6c810767    Townhouse       False   
 3  2021-10-19  567a3e11-735b-476b-81cb-7b9411e7d51a    Apartment       False   
 4  2022-08-29  ee175470-1ce1-44fb-92f5-ff3fa0b7c3ff    Apartment        True   
 
    MonthlyRevenue  RentalLength  MaintenanceCost  \
 0     1012.022894            14       772.496849   
 1        0.000000             0       796.225712   
 2        0.000000             0       258.064959   
 3        0.000000             0       828.861108   
 4     2751.435515            13       193.095142   
 
                               MaintenanceIssueLogged  CustomerSatisfaction  
 0  Quality move remain themselves billion factor ...             17.451423  
 1                   Until yeah worr