# Meme-to-Market Data Collection
## Steps: Data Sources Integration (Reddit, Google Trends, TikTok, KnowYourMeme)

In [None]:
import sys
sys.path.append('..')

from src.data_collection.bigquery_reddit import RedditDataCollector
from src.data_collection.google_trends import GoogleTrendsCollector
from src.data_collection.tiktok_api import TikTokDataCollector
from src.data_collection.knowyourmeme_scraper import KnowYourMemeCollector
import pandas as pd

## Step 1: Extract Reddit Comments from BigQuery

In [None]:
reddit_collector = RedditDataCollector()
reddit_df = reddit_collector.extract_reddit_comments(limit=50000)

print(f"Reddit Data Shape: {reddit_df.shape}")
print(f"Columns: {reddit_df.columns.tolist()}")
reddit_df.head()

## Step 2: Collect Google Trends Search Velocity

In [None]:
trends_collector = GoogleTrendsCollector()
trends_df = trends_collector.collect_trends_data(timeframe='today 3-m')
trends_summary = trends_collector.calculate_search_delta(trends_df)

print(f"Trends Data Shape: {trends_df.shape}")
print("\nSearch Delta Summary:")
trends_summary.head(10)

## Step 3: Fetch TikTok Trending Data

In [None]:
tiktok_collector = TikTokDataCollector()
tiktok_trends_df = tiktok_collector.fetch_trending_hashtags()

print(f"TikTok Trends Shape: {tiktok_trends_df.shape}")
print(f"Viral hashtags: {tiktok_trends_df['viral_threshold'].sum()}")
tiktok_trends_df.head()

## Step 4: Collect KnowYourMeme Weaponized Patterns

In [None]:
kym_collector = KnowYourMemeCollector()
weaponized_patterns = kym_collector.fetch_weaponized_patterns()

print(f"Weaponized Patterns: {len(weaponized_patterns)}")
weaponized_patterns

## Step 5: Save Raw Data to BigQuery

In [None]:
reddit_collector.save_to_bigquery(reddit_df, 'reddit_meme_raw')
print("Reddit data saved to BigQuery")

## Step 6: Data Quality Summary

In [None]:
print("Data Collection Summary:")
print(f"Reddit comments: {len(reddit_df)}")
print(f"Google Trends data points: {len(trends_df)}")
print(f"TikTok trends: {len(tiktok_trends_df)}")
print(f"Weaponized patterns: {len(weaponized_patterns)}")
print(f"\nDate range: {reddit_df['date'].min()} to {reddit_df['date'].max()}")
print(f"Unique subreddits: {reddit_df['subreddit'].nunique()}")