In [10]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

dates = pd.date_range(start="2023-01-01", end="2023-12-31", freq="W")
categories = [
    "Pub réseaux sociaux",
    "Partenariat influenceur",
    "SEO/SEA",
    "Organisation d'évènement",
]
num_dates = len(dates)

website_visitors = np.random.poisson(lam=10000, size=num_dates) + np.random.normal(
    0, 50, num_dates
).astype(int)

conversion_rates = []
for category in np.tile(categories, int(num_dates / 4)):
    if category == "Pub réseaux sociaux":
        conversion_rate = np.random.uniform(0.01, 0.03) * 100  # 1% to 3%
    elif category == "Partenariat influenceur":
        conversion_rate = np.random.uniform(0.03, 0.07) * 100  # 3% to 7%
    elif category == "SEO/SEA":
        conversion_rate = np.random.uniform(0.02, 0.05) * 100  # 2% to 5%
    elif category == "Organisation d'évènement":
        conversion_rate = np.random.uniform(0.04, 0.08) * 100  # 4% to 8%
    conversion_rates.append(conversion_rate)

# Create dataframes
time_series_data = {
    "Date": dates,
    "Website_Visitors": website_visitors,
}

category_data = {
    "Category": np.tile(categories, int(num_dates / 4)),
    "Monthly_Spend": np.random.normal(loc=20000, scale=10000, size=num_dates - 1),
}

# Combine dataframes and add conversion rates
time_series_df = pd.DataFrame(time_series_data)
category_df = pd.DataFrame(category_data)
category_df["Conversion_Rate"] = (
    conversion_rates  # Add category-specific conversion rates
)
data = pd.concat([time_series_df, category_df], axis=1).dropna()

# Calculate the number of converted visitors
data["Converted_Visitor"] = round(
    data["Website_Visitors"] * data["Conversion_Rate"] / 100
).astype(int)
data["Website_Visitors"] = data["Website_Visitors"].astype(int)

# Display the first few rows
data.head(n=10)
data.to_csv("sellbetter.csv", index=False)

ModuleNotFoundError: No module named 'pandas'