In [1]:
import pandas as pd
import numpy as np
import random

# Increase weeks & introduce daily records
days = pd.date_range(start="2022-01-01", periods=1000, freq='D')  # 1000 days (~3 years)
platforms = ["Google Ads", "Facebook Ads", "Instagram Ads", "Twitter Ads", "LinkedIn Ads", "TikTok Ads"]

data = []

for day in days:
    for platform in platforms:
        ctr = round(np.random.uniform(0.5, 5.0), 2)  # CTR between 0.5% and 5%
        impressions = np.random.randint(5000, 500000)  # Large-scale campaign views
        likes = np.random.randint(100, 10000)
        shares = np.random.randint(50, 5000)
        comments = np.random.randint(30, 3000)
        sales_revenue = round(np.random.uniform(1000, 100000), 2)
        ad_spend = round(np.random.uniform(500, 50000), 2)  # Varying budget per day
        conversion_rate = round(np.random.uniform(0.5, 15.0), 2)  # Conversion rate between 0.5% and 15%
        roi = round(((sales_revenue - ad_spend) / ad_spend) * 100, 2)  # ROI Calculation

        # Sentiment Analysis (Ensure sum = 100%)
        positive = np.random.randint(40, 70)
        neutral = np.random.randint(20, 50)
        negative = 100 - (positive + neutral)

        data.append([day, platform, ctr, impressions, likes, shares, comments, 
                     sales_revenue, conversion_rate, ad_spend, roi, 
                     positive, neutral, negative])

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Date", "Platform", "CTR (%)", "Impressions", 
                                  "Likes", "Shares", "Comments", "Sales Revenue ($)",
                                  "Conversion Rate (%)", "Ad Spend ($)", "ROI (%)",
                                  "Positive Sentiment (%)", "Neutral Sentiment (%)", 
                                  "Negative Sentiment (%)"])

# Save to CSV
df.to_csv("large_campaign_dataset.csv", index=False)

# Display first few rows
print(df.head())

# Check total records
print(f"Total Records: {len(df)}")


        Date       Platform  CTR (%)  Impressions  Likes  Shares  Comments  \
0 2022-01-01     Google Ads     1.13       168882   9429     331      2406   
1 2022-01-01   Facebook Ads     1.43       179926   8496    3222      1262   
2 2022-01-01  Instagram Ads     2.57       211453   1907    4078      2154   
3 2022-01-01    Twitter Ads     4.58       319919   3258    3864       382   
4 2022-01-01   LinkedIn Ads     4.43       363436   9645     127       237   

   Sales Revenue ($)  Conversion Rate (%)  Ad Spend ($)  ROI (%)  \
0           75989.55                 8.70       9499.17   699.96   
1           91536.47                 1.60      17151.48   433.69   
2           20734.99                 0.66      35372.06   -41.38   
3           58920.44                 5.99       9232.33   538.20   
4           51968.96                10.24      18750.77   177.16   

   Positive Sentiment (%)  Neutral Sentiment (%)  Negative Sentiment (%)  
0                      46                     2

In [2]:
df.shape

(6000, 14)

In [3]:
df.head()

Unnamed: 0,Date,Platform,CTR (%),Impressions,Likes,Shares,Comments,Sales Revenue ($),Conversion Rate (%),Ad Spend ($),ROI (%),Positive Sentiment (%),Neutral Sentiment (%),Negative Sentiment (%)
0,2022-01-01,Google Ads,1.13,168882,9429,331,2406,75989.55,8.7,9499.17,699.96,46,20,34
1,2022-01-01,Facebook Ads,1.43,179926,8496,3222,1262,91536.47,1.6,17151.48,433.69,64,32,4
2,2022-01-01,Instagram Ads,2.57,211453,1907,4078,2154,20734.99,0.66,35372.06,-41.38,68,21,11
3,2022-01-01,Twitter Ads,4.58,319919,3258,3864,382,58920.44,5.99,9232.33,538.2,64,37,-1
4,2022-01-01,LinkedIn Ads,4.43,363436,9645,127,237,51968.96,10.24,18750.77,177.16,40,30,30


In [5]:
df.isnull()

Unnamed: 0,Date,Platform,CTR (%),Impressions,Likes,Shares,Comments,Sales Revenue ($),Conversion Rate (%),Ad Spend ($),ROI (%),Positive Sentiment (%),Neutral Sentiment (%),Negative Sentiment (%)
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5996,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5997,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5998,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [10]:
# percentage of missing values 
df.isnull().sum()/len(df)

Date                      0.0
Platform                  0.0
CTR (%)                   0.0
Impressions               0.0
Likes                     0.0
Shares                    0.0
Comments                  0.0
Sales Revenue ($)         0.0
Conversion Rate (%)       0.0
Ad Spend ($)              0.0
ROI (%)                   0.0
Positive Sentiment (%)    0.0
Neutral Sentiment (%)     0.0
Negative Sentiment (%)    0.0
dtype: float64