In [None]:
import pandas as pd
from faker import Faker
import random
import numpy as np
import matplotlib.pyplot as plt

fake = Faker()

CATEGORIES = ["Elektronica", "Kleding", "Schoenen", "Accessoires", "Boeken"]
SHIP_MODES = ["Express", "Standard", "Next-Day", "2-Day"]

def generate_customer_data(aantal_klanten):
    customers = []

    for klant_id in range(1, aantal_klanten + 1):
        customers.append([klant_id, fake.name(), fake.email(), fake.city(), fake.state(), fake.country()])

    customers_df = pd.DataFrame(customers, columns=["Klant_ID", "Naam", "E-mailadres", "Locatie", "State", "Country"])
    return customers_df

def generate_product_data(aantal_producten):
    products = []

    for _ in range(aantal_producten):
        categorie = random.choice(CATEGORIES)
        productnaam = fake.word() if categorie == "Boeken" else fake.word()
        prijs = round(max(10, np.random.normal(loc=500, scale=200)), 2)
        voorraadniveau = random.randint(0, 100)
        products.append([productnaam, categorie, prijs, voorraadniveau])

    products_df = pd.DataFrame(products, columns=["Productnaam", "Categorie", "Prijs", "Voorraadniveau"])
    return products_df

def generate_transaction_data(aantal_transacties, customers_df, products_df):
    transactions = []

    for transactie_id in range(1, aantal_transacties + 1):
        klant_id = random.choice(customers_df['Klant_ID'])
        product = random.choice(products_df['Productnaam'])
        product_info = products_df[products_df['Productnaam'] == product]
        categorie, prijs, shipmode = product_info.iloc[0, 1], product_info.iloc[0, 2], random.choice(SHIP_MODES)
        datum = fake.date_time_between(start_date='-1y', end_date='now')
        transactions.append([transactie_id, klant_id, product, categorie, prijs, shipmode, datum])

    transactions_df = pd.DataFrame(transactions, columns=["Transactie_ID", "Klant_ID", "Product", "Categorie", "Prijs", 'shipmode', "Datum/Tijd"])
    transactions_df['Datum/Tijd'] = pd.to_datetime(transactions_df['Datum/Tijd'])
    return transactions_df

def plot_and_analyze_data(data, x_column, y_column, title):
    data[x_column].value_counts().plot(kind='bar', x=x_column, y=y_column)
    plt.title(title)
    plt.xlabel(x_column)
    plt.ylabel(y_column)
    plt.show()

    summary_stats = data[y_column].describe()
    return summary_stats

def validate_and_save_data(data, filename):
    cleaned_data = data.drop_duplicates()
    cleaned_data.to_csv(filename, index=False)

aantal_transacties = 100
aantal_klanten = 50
aantal_producten = 20

klanten_df = generate_customer_data(aantal_klanten)
producten_df = generate_product_data(aantal_producten)
transacties_df = generate_transaction_data(aantal_transacties, klanten_df, producten_df)

plot_and_analyze_data(transacties_df, "Categorie", "Transacties", "Transacties per Categorie")

validate_and_save_data(transacties_df, "transacties_data.csv")
validate_and_save_data(klanten_df, "klanten_data.csv")
validate_and_save_data(producten_df, "producten_data.csv")
