Python code to create data to report the Issues for Data Quality and Integrity

In [2]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate inaccurate product information
for _ in range(50):
    data.append({
        'Product_ID': random.randint(1001, 1050),
        'Product_Name': fake.unique.first_name(),
        'Price': round(random.uniform(10, 100), 2),
        'Description': fake.sentence(nb_words=10),
    })

# Simulate missing customer data
for _ in range(30):
    data.append({
        'Customer_ID': random.randint(2001, 2050),
        'Customer_Name': fake.name(),
        'Email': None,
        'Phone': None,
    })

# Simulate data silos and inconsistencies
for _ in range(20):
    data.append({
        'Order_ID': random.randint(3001, 3050),
        'Product_ID': random.randint(1001, 1050),
        'Quantity': random.randint(1, 10),
        'Order_Status': 'Processing' if random.random() < 0.7 else 'Shipped',
    })

# Simulate data timeliness issues
for _ in range(10):
    data.append({
        'Product_ID': random.randint(1001, 1050),
        'Inventory_Level': random.randint(0, 50),
        'Last_Updated': '2023-10-25' if random.random() < 0.5 else '2023-10-29',
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('data_quality_integrity_issues.csv', index=False)


 Python code snippet to generate a mock dataset for reporting issues related to customer acquisition and retention:

In [4]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate customer acquisition and retention issues
for _ in range(100):
    data.append({
        'Customer_ID': random.randint(1001, 1100),
        'Customer_Name': fake.name(),
        'Email': fake.email(),
        'Signup_Date': fake.date_between(start_date='-1y', end_date='today'),
        'Last_Purchase_Date': fake.date_between(start_date='-1y', end_date='today'),
        'Total_Purchases': random.randint(1, 10),
        'Total_Spent': round(random.uniform(10, 500), 2),
        'Last_Login_Date': fake.date_between(start_date='-30d', end_date='today'),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('customer_acquisition_retention_issues.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to inventory management:

In [5]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate inventory management issues
for _ in range(100):
    data.append({
        'Product_ID': random.randint(1001, 1100),
        'Product_Name': fake.unique.first_name(),
        'Category': fake.random_element(elements=('Electronics', 'Clothing', 'Home', 'Toys')),
        'Current_Stock': random.randint(0, 100),
        'Last_Replenishment_Date': fake.date_between(start_date='-30d', end_date='today'),
        'Stock_Alert': random.choice([True, False]),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('inventory_management_issues.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to conversion rate optimization:

In [7]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate conversion rate optimization issues
for _ in range(1000):
    data.append({
        'Visitor_ID': random.randint(1001, 2000),
        'Page_Viewed': fake.url(),
        'Add_to_Cart': random.choice([True, False]),
        'Checkout_Started': random.choice([True, False]),
        'Order_Placed': random.choice([True, False]),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('conversion_rate_optimization_issues.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to payment_fraud_security_issues

In [8]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate payment fraud and security issues with partially masked sensitive information
for _ in range(100):
    data.append({
        'Transaction_ID': fake.unique.uuid4(),
        'Customer_Name': fake.first_name(),
        'Customer_Email': fake.email(),
        'Card_Number': f"**** **** **** {str(random.randint(1000, 9999))}",
        'Transaction_Amount': round(random.uniform(10, 500), 2),
        'Payment_Status': random.choice(['Approved', 'Declined', 'Pending']),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('payment_fraud_security_issues.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to supply chain and logistics:

In [17]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate supply chain and logistics issues
for _ in range(100):
    order_date = fake.date_between(start_date='-30d', end_date='today')
    shipped_date = fake.date_between_dates(date_start=order_date)

    data.append({
        'Order_ID': fake.unique.uuid4(),
        'Product_ID': random.randint(1001, 1100),
        'Product_Name': fake.unique.first_name(),
        'Order_Date': order_date,
        'Shipped_Date': shipped_date,
        'Delivery_Status': random.choice(['Shipped', 'In Transit', 'Delivered']),
        'Shipping_Carrier': random.choice(['UPS', 'FedEx', 'DHL', 'USPS']),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('supply_chain_logistics_issues.csv', index=False)


 Python code snippet to generate a mock dataset for reporting issues related to customer feedback and reviews:

In [13]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate customer feedback and reviews
for _ in range(100):
    data.append({
        'Review_ID': fake.unique.uuid4(),
        'Customer_Name': fake.first_name(),
        'Product_ID': random.randint(1001, 1100),
        'Rating': random.randint(1, 5),
        'Review_Text': fake.paragraph(nb_sentences=2),
        'Review_Date': fake.date_between(start_date='-365d', end_date='today'),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('customer_feedback_reviews.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to mobile responsiveness of websites, including different categories of websites, URL, downtime, and issues:

In [14]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate mobile responsiveness issues
categories = ['E-commerce', 'News', 'Blogs', 'Social Media', 'Portfolio']

for _ in range(100):
    data.append({
        'Website_URL': fake.url(),
        'Category': random.choice(categories),
        'Mobile_Friendly': random.choice([True, False]),
        'Downtime_Hours': round(random.uniform(0, 24), 2),
        'Issues_Description': fake.sentence(nb_words=8),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('mobile_responsiveness_issues.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to competitive analysis:

In [15]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate competitive analysis issues
for _ in range(100):
    data.append({
        'Product_ID': random.randint(1001, 1100),
        'Product_Name': fake.unique.first_name(),
        'Price': round(random.uniform(10, 500), 2),
        'Customer_Rating': round(random.uniform(1, 5), 2),
        'Competitor_Prices': [round(random.uniform(10, 500), 2) for _ in range(3)],
        'Competitor_Ratings': [round(random.uniform(1, 5), 2) for _ in range(3)],
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('competitive_analysis_issues.csv', index=False)


Python code snippet to generate a mock dataset for reporting issues related to regulatory compliance:

In [16]:
import pandas as pd
import random
import faker

# Create a random data generator
fake = faker.Faker()

# Initialize lists to store data
data = []

# Simulate regulatory compliance issues
for _ in range(100):
    data.append({
        'Transaction_ID': fake.unique.uuid4(),
        'Customer_Name': fake.first_name(),
        'Customer_Email': fake.email(),
        'Product_ID': random.randint(1001, 1100),
        'Transaction_Amount': round(random.uniform(10, 500), 2),
        'Country': random.choice(['US', 'EU', 'Canada', 'Australia']),
        'Regulatory_Issues': random.choice(['GDPR', 'CCPA', 'HIPAA', 'PIPEDA']),
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Save the data to a CSV file
df.to_csv('regulatory_compliance_issues.csv', index=False)
