In [4]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import random

In [2]:

def generate_organic_data(days=30):
    """
    Generate comprehensive dummy data for all 4 platforms.
    Data includes: followers, impressions, likes, comments, shares,
    saves, link_clicks, profile_visits, posts_published, views.
    """
    np.random.seed(42)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=days)
    dates = pd.date_range(start=start_date, end=end_date, freq='D')

    platforms_config = {
        "Instagram": {
            "base_followers": 45200,
            "daily_growth_range": (50, 250),
            "base_impressions": (15000, 35000),
            "engagement_multiplier": 0.065,
            "posts_per_week": 5,
            "base_views": (8000, 25000),
        },
        "TikTok": {
            "base_followers": 28500,
            "daily_growth_range": (80, 400),
            "base_impressions": (25000, 80000),
            "engagement_multiplier": 0.08,
            "posts_per_week": 4,
            "base_views": (20000, 100000),
        },
        "YouTube": {
            "base_followers": 12800,
            "daily_growth_range": (10, 80),
            "base_impressions": (5000, 20000),
            "engagement_multiplier": 0.045,
            "posts_per_week": 2,
            "base_views": (3000, 15000),
        },
        "LinkedIn": {
            "base_followers": 8900,
            "daily_growth_range": (5, 40),
            "base_impressions": (3000, 12000),
            "engagement_multiplier": 0.055,
            "posts_per_week": 3,
            "base_views": (2000, 8000),
        }
    }

    all_data = []

    for platform, config in platforms_config.items():
        followers = config["base_followers"]

        for i, date in enumerate(dates):
            # Follower growth (with some variance)
            daily_growth = np.random.randint(*config["daily_growth_range"])
            # Occasional dips (unfollows)
            if np.random.random() < 0.15:
                daily_growth = -np.random.randint(10, 50)
            followers += daily_growth

            # Impressions
            impressions = np.random.randint(*config["base_impressions"])
            # Weekend boost
            if date.weekday() >= 5:
                impressions = int(impressions * 1.2)

            # Engagement based on impressions
            em = config["engagement_multiplier"]
            likes = int(impressions * em * np.random.uniform(0.6, 1.4))
            comments = int(likes * np.random.uniform(0.05, 0.15))
            shares = int(likes * np.random.uniform(0.03, 0.10))
            saves = int(likes * np.random.uniform(0.08, 0.20))

            # Traffic metrics
            profile_visits = int(impressions * np.random.uniform(0.02, 0.06))
            link_clicks = int(profile_visits * np.random.uniform(0.10, 0.30))

            # Views (video views)
            views = np.random.randint(*config["base_views"])

            # Posts published (not every day)
            posts_goal_weekly = config["posts_per_week"]
            posts_published = 1 if np.random.random() < (posts_goal_weekly / 7) else 0

            all_data.append({
                "date": date,
                "platform": platform,
                "followers": followers,
                "follower_growth": daily_growth,
                "impressions": impressions,
                "likes": likes,
                "comments": comments,
                "shares": shares,
                "saves": saves,
                "views": views,
                "profile_visits": profile_visits,
                "link_clicks": link_clicks,
                "posts_published": posts_published,
                "posts_goal_weekly": posts_goal_weekly,
            })

    df = pd.DataFrame(all_data)

    # Calculate derived metrics
    df["engagement_rate"] = (
        (df["likes"] + df["comments"] + df["shares"]) / df["impressions"] * 100
    )
    df["share_of_voice"] = (
        (df["saves"] + df["shares"]) / df["impressions"] * 100
    )
    df["profile_conversion_rate"] = (
        df["link_clicks"] / df["profile_visits"] * 100
    )

    return df


In [5]:
# Generate data for the last 60 days
organic_data = generate_organic_data(days=30)

In [7]:
# save to csv for use in other notebooks
organic_data.to_csv("../data/processed/organic_data.csv", index=False)

In [8]:
def generate_content_library(num_posts=30):
    """
    Generate dummy content library data (individual posts).
    Each post has: title, platform, type, views, likes, comments, 
    shares, saves, link_clicks, date.
    """
    np.random.seed(123)

    content_types = {
        "Instagram": ["Reel", "Story", "Carousel", "Feed Post"],
        "TikTok": ["Short Video", "Duet", "Stitch"],
        "YouTube": ["Short", "Long Video", "Live"],
        "LinkedIn": ["Article", "Post", "Document"],
    }

    post_titles = [
        "Behind the Scenes: How We Build Products",
        "5 Tips for Better Engagement",
        "Customer Success Story: Brand X",
        "Weekly Motivation Monday",
        "Product Launch Teaser",
        "Q&A Session with the Team",
        "Industry Trend Breakdown",
        "Day in the Life at Office",
        "Tutorial: Getting Started Guide",
        "Community Spotlight Feature",
        "New Feature Announcement",
        "Weekend Vibes & Culture",
        "Expert Interview Series Ep.1",
        "Before vs After Transformation",
        "Myth Busters: Common Mistakes",
        "Flash Sale Announcement",
        "User Generated Content Reshare",
        "Infographic: Key Statistics",
        "Team Celebration Moment",
        "Throwback Thursday Classic",
        "How-to: Advanced Tips & Tricks",
        "Live Q&A Recap Highlights",
        "Partner Collaboration Post",
        "Seasonal Campaign Launch",
        "Data Report: Monthly Insights",
        "Sneak Peek: Upcoming Release",
        "Challenge: Join the Trend",
        "Thank You 10K Followers!",
        "Case Study: ROI Results",
        "Friday Fun: Memes & Laughs",
    ]

    posts = []
    for i in range(num_posts):
        platform = random.choice(list(content_types.keys()))
        content_type = random.choice(content_types[platform])
        days_ago = random.randint(0, 29)
        post_date = datetime.now() - timedelta(days=days_ago)

        views = np.random.randint(500, 150000)
        likes = int(views * np.random.uniform(0.03, 0.12))
        comments = int(likes * np.random.uniform(0.05, 0.20))
        shares = int(likes * np.random.uniform(0.02, 0.15))
        saves = int(likes * np.random.uniform(0.05, 0.25))
        link_clicks = int(views * np.random.uniform(0.005, 0.03))

        posts.append({
            "post_id": f"POST-{i+1:03d}",
            "title": post_titles[i],
            "platform": platform,
            "content_type": content_type,
            "date": post_date.strftime("%Y-%m-%d"),
            "views": views,
            "likes": likes,
            "comments": comments,
            "shares": shares,
            "saves": saves,
            "link_clicks": link_clicks,
            "virality_score": round((shares + saves) / max(views, 1) * 100, 2),
            "conversion_score": round(link_clicks / max(views, 1) * 100, 2),
        })

    return pd.DataFrame(posts)

In [9]:
content_library = generate_content_library(num_posts=30)
content_library.head()

Unnamed: 0,post_id,title,platform,content_type,date,views,likes,comments,shares,saves,link_clicks,virality_score,conversion_score
0,POST-001,Behind the Scenes: How We Build Products,YouTube,Long Video,2026-02-08,16225,904,75,82,175,252,1.58,1.55
1,POST-002,5 Tips for Better Engagement,YouTube,Short,2026-01-13,66132,5434,385,391,953,866,2.03,1.31
2,POST-003,Customer Success Story: Brand X,YouTube,Short,2026-01-21,36162,1279,140,148,110,339,0.71,0.94
3,POST-004,Weekly Motivation Monday,YouTube,Short,2026-01-24,121928,8353,790,701,1654,1700,1.93,1.39
4,POST-005,Product Launch Teaser,LinkedIn,Document,2026-02-09,149338,8820,919,438,959,3102,0.94,2.08


In [10]:
# save to csv for use in other notebooks
content_library.to_csv("../data/processed/content_library.csv", index=False)