In [1]:
import os
import re
import csv
import tempfile
from datetime import datetime
from statistics import mean
from collections import Counter

import instaloader
from tqdm import tqdm
from transformers import pipeline

# Optional: for video meta
try:
    from moviepy.editor import VideoFileClip
    MOVIEPY_AVAILABLE = True
except ImportError:
    MOVIEPY_AVAILABLE = False

# -----------------------
# Config
# -----------------------
MAX_COMMENTS_PER_POST = 50
MOOD_LABELS = ["motivation", "comedy", "brainrot", "informative"]

# -----------------------
# Helpers
# -----------------------
def extract_hashtags(text):
    return re.findall(r"#(\w+)", text or "")

def init_pipelines():
    """Load ML models lazily."""
    global mood_pipeline, sentiment_pipeline
    if "mood_pipeline" not in globals():
        print("Loading transformers pipelines …")
        mood_pipeline = pipeline("zero-shot-classification",
                                 model="facebook/bart-large-mnli")
        sentiment_pipeline = pipeline("sentiment-analysis")

def classify_mood(text):
    if not text.strip():
        return None
    out = mood_pipeline(text, candidate_labels=MOOD_LABELS)
    return out["labels"][0]

def analyse_comment_tone(comments):
    """Return counts of sentiment labels."""
    if not comments:
        return {}
    texts = [c["text"] for c in comments if c["text"]]
    if not texts:
        return {}
    results = sentiment_pipeline(texts)
    counts = Counter(r["label"] for r in results)
    return dict(counts)

def download_and_get_video_meta(url):
    """Download a video temporarily and return duration, first-3s hook metric."""
    if not MOVIEPY_AVAILABLE or not url:
        return None, None
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    try:
        import requests
        r = requests.get(url, stream=True)
        for chunk in r.iter_content(chunk_size=8192):
            if chunk:
                tmp.write(chunk)
        tmp.close()
        clip = VideoFileClip(tmp.name)
        duration = clip.duration
        # naive "hook performance" = ratio of first 3 s to total length
        hook_ratio = min(3, duration) / duration
        clip.close()
        return duration, hook_ratio
    except Exception:
        return None, None
    finally:
        try:
            os.remove(tmp.name)
        except OSError:
            pass

# -----------------------
# Instagram fetch
# -----------------------
def get_posts_instaloader(username, max_posts=20, login_user=None, login_pass=None):
    L = instaloader.Instaloader(download_videos=False,
                                save_metadata=False,
                                download_comments=False)
    if login_user and login_pass:
        L.login(login_user, login_pass)
    profile = instaloader.Profile.from_username(L.context, username)
    posts = []
    for i, post in enumerate(tqdm(profile.get_posts(), total=max_posts)):
        if i >= max_posts:
            break
        data = {
            "id": str(post.mediaid),
            "shortcode": post.shortcode,
            "caption": post.caption or "",
            "hashtags": extract_hashtags(post.caption or ""),
            "likes": post.likes,
            "comments_count": post.comments_count,
            "is_video": post.is_video,
            "display_url": post.url,
            "video_url": post.video_url if post.is_video else None,
            "timestamp": post.date_utc.isoformat(),
            # New fields
            "location": post.location.name if post.location else None,
            "music_title": None,   # Placeholder (needs Graph API)
            "shares": None,        # Placeholder (needs Graph API)
            "saves": None,         # Placeholder (needs Graph API)
        }
        comments = []
        try:
            for c in post.get_comments():
                comments.append({
                    "text": getattr(c, "text", ""),
                    "owner": getattr(getattr(c, "owner", None), "username", None)
                })
                if len(comments) >= MAX_COMMENTS_PER_POST:
                    break
        except Exception:
            pass
        data["comments"] = comments
        posts.append(data)
    return posts

# -----------------------
# Main analysis
# -----------------------
def analyse_posts(posts, csv_path):
    init_pipelines()
    rows = []
    for p in tqdm(posts, desc="Analysing"):
        mood = classify_mood(p["caption"])
        tone_counts = analyse_comment_tone(p["comments"])
        duration, hook_ratio = (None, None)
        if p["is_video"]:
            duration, hook_ratio = download_and_get_video_meta(p["video_url"])
        rows.append({
            "id": p["id"],
            "shortcode": p["shortcode"],
            "timestamp": p["timestamp"],
            "likes": p["likes"],
            "comments_count": p["comments_count"],
            "shares": p["shares"],
            "saves": p["saves"],
            "caption": p["caption"],
            "hashtags": ",".join(p["hashtags"]),
            "mood": mood,
            "comment_tone": tone_counts,
            "video_duration": duration,
            "hook_ratio_first3s": hook_ratio,
            "location": p["location"],
            "music_title": p["music_title"],
        })
    # write CSV
    keys = rows[0].keys()
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        for r in rows:
            writer.writerow(r)
    return rows

def print_suggestions(rows):
    # Basic insights
    moods = [r["mood"] for r in rows if r["mood"]]
    most_common_mood = Counter(moods).most_common(1)
    hashtags = [h for r in rows for h in (r["hashtags"].split(",") if r["hashtags"] else [])]
    top_hashtags = [h for h, _ in Counter(hashtags).most_common(5)]
    avg_hook = mean([r["hook_ratio_first3s"] for r in rows if r["hook_ratio_first3s"]]) \
               if any(r["hook_ratio_first3s"] for r in rows) else None

    print("\n--- Suggestions ---")
    if most_common_mood:
        print(f"• Most frequent mood detected: {most_common_mood[0][0]}")
    if top_hashtags:
        print(f"• Top hashtags to refine around: {', '.join(top_hashtags)}")
    if avg_hook:
        print(f"• Average first-3-seconds hook ratio: {avg_hook:.2f}")
        if avg_hook < 0.2:
            print("  Consider stronger hooks or A/B thumbnails.")
    print("• Post when your audience is most active (use IG Insights to get actual active hours).")
    print("• Tailor colour palette & fonts to the dominant mood for stronger branding.")

# -----------------------
# Run
# -----------------------
if __name__ == "__main__":
    class Args:
        def __init__(self, username, max_posts=10, login_user=None, login_pass=None, out="ig_posts.csv"):
            self.username = username
            self.max_posts = max_posts
            self.login_user = login_user
            self.login_pass = login_pass
            self.out = out

    # Example: replace with your own account
    args = Args(username='instagram', max_posts=5)

    posts = get_posts_instaloader(args.username,
                                  max_posts=args.max_posts,
                                  login_user=args.login_user,
                                  login_pass=args.login_pass)
    rows = analyse_posts(posts, args.out)
    print(f"\nCSV saved to {args.out}")
    print_suggestions(rows)


JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]
JSON Query to graphql/query: 401 Unauthorized - "fail" status, message "Please wait a few minutes before you try again." when accessing https://www.instagram.com/graphql/query?variables=%7B%22data%22%3A%7B%22count%22%3A12%2C%22include_relationship_info%22%3Atrue%2C%22latest_besties_reel_media%22%3Atrue%2C%22latest_reel_media%22%3Atrue%7D%2C%22username%22%3A%22instagram%22%2C%22__relay_internal__pv__PolarisFeedShareMenurelayprovider%22%3Afalse%7D&doc_id=7898261790222653&server_timestamps=true [retrying; skip with ^C]


ConnectionException: JSON Query to graphql/query: 401 Unauthorized - "fail" status, message "Please wait a few minutes before you try again." when accessing https://www.instagram.com/graphql/query?variables=%7B%22data%22%3A%7B%22count%22%3A12%2C%22include_relationship_info%22%3Atrue%2C%22latest_besties_reel_media%22%3Atrue%2C%22latest_reel_media%22%3Atrue%7D%2C%22username%22%3A%22instagram%22%2C%22__relay_internal__pv__PolarisFeedShareMenurelayprovider%22%3Afalse%7D&doc_id=7898261790222653&server_timestamps=true

In [2]:
import requests
import csv
from collections import Counter
from statistics import mean
from tqdm import tqdm

ACCESS_TOKEN = "YOUR_ACCESS_TOKEN"
IG_USER_ID = "YOUR_IG_USER_ID"  # numeric ID
MAX_POSTS = 10

# Define fields to fetch
FIELDS = (
    "id,caption,media_type,media_url,permalink,timestamp,"
    "like_count,comments_count,thumbnail_url,children{media_url,media_type},"
    "insights.metric(reach,saved,impressions)"
)

def fetch_posts(user_id, access_token, max_posts=10):
    url = f"https://graph.facebook.com/v17.0/{user_id}/media"
    params = {
        "fields": FIELDS,
        "access_token": access_token,
        "limit": max_posts
    }

    posts = []
    r = requests.get(url, params=params).json()

    for item in r.get("data", []):
        posts.append({
            "id": item.get("id"),
            "caption": item.get("caption", ""),
            "media_type": item.get("media_type"),
            "media_url": item.get("media_url"),
            "permalink": item.get("permalink"),
            "timestamp": item.get("timestamp"),
            "likes": item.get("like_count"),
            "comments_count": item.get("comments_count"),
            "saves": None,  # Can get from insights
            "reach": None,  # Can get from insights
        })

    return posts

# -----------------------
# Example CSV Export
# -----------------------
def save_to_csv(posts, path="ig_graph_api_posts.csv"):
    if not posts:
        return
    keys = posts[0].keys()
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        for p in posts:
            writer.writerow(p)
    print(f"CSV saved to {path}")

# -----------------------
# Run
# -----------------------
if __name__ == "__main__":
    posts = fetch_posts(IG_USER_ID, ACCESS_TOKEN, max_posts=MAX_POSTS)
    save_to_csv(posts)


In [3]:
import os
import re
import csv
import json
import tempfile
import argparse
from datetime import datetime
from statistics import mean
from collections import Counter

import instaloader
from tqdm import tqdm
from transformers import pipeline

# Optional: for video meta
try:
    from moviepy.editor import VideoFileClip
    MOVIEPY_AVAILABLE = True
except ImportError:
    MOVIEPY_AVAILABLE = False

# -----------------------
# Config
# -----------------------
MAX_COMMENTS_PER_POST = 50
MOOD_LABELS = ["motivation", "comedy", "brainrot", "informative"]

# -----------------------
# Helpers
# -----------------------
def extract_hashtags(text):
    return re.findall(r"#(\w+)", text or "")

def init_pipelines():
    """Load ML models lazily."""
    global mood_pipeline, sentiment_pipeline
    if "mood_pipeline" not in globals():
        print("Loading transformers pipelines …")
        mood_pipeline = pipeline("zero-shot-classification",
                                 model="facebook/bart-large-mnli")
        sentiment_pipeline = pipeline("sentiment-analysis")

def classify_mood(text):
    if not text or not text.strip():
        return None
    try:
        out = mood_pipeline(text, candidate_labels=MOOD_LABELS)
        return out["labels"][0]
    except Exception:
        return None

def analyse_comment_tone(comments):
    """Return counts of sentiment labels."""
    if not comments:
        return {}
    texts = [c["text"] for c in comments if c["text"]]
    if not texts:
        return {}
    try:
        results = sentiment_pipeline(texts)
        counts = Counter(r["label"] for r in results)
        return dict(counts)
    except Exception:
        return {}

def download_and_get_video_meta(url):
    """Download a video temporarily and return duration, first-3s hook metric."""
    if not MOVIEPY_AVAILABLE or not url:
        return None, None
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    try:
        import requests
        r = requests.get(url, stream=True, timeout=15)
        for chunk in r.iter_content(chunk_size=8192):
            if chunk:
                tmp.write(chunk)
        tmp.close()
        clip = VideoFileClip(tmp.name)
        duration = clip.duration
        # naive "hook performance" = ratio of first 3 s to total length
        hook_ratio = min(3, duration) / duration
        clip.close()
        return duration, hook_ratio
    except Exception:
        return None, None
    finally:
        try:
            os.remove(tmp.name)
        except OSError:
            pass

# -----------------------
# Instagram fetch
# -----------------------
def get_posts_instaloader(username, max_posts=20, login_user=None, login_pass=None):
    L = instaloader.Instaloader(download_videos=False,
                                save_metadata=False,
                                download_comments=False)
    if login_user and login_pass:
        L.login(login_user, login_pass)
    profile = instaloader.Profile.from_username(L.context, username)
    posts = []
    for i, post in enumerate(tqdm(profile.get_posts(), total=max_posts, desc="Fetching posts")):
        if i >= max_posts:
            break
        data = {
            "id": str(post.mediaid),
            "shortcode": post.shortcode,
            "caption": post.caption or "",
            "hashtags": extract_hashtags(post.caption or ""),
            "likes": post.likes,
            "comments_count": post.comments_count,
            "is_video": post.is_video,
            "display_url": post.url,
            "video_url": post.video_url if post.is_video else None,
            "timestamp": post.date_utc.isoformat(),
            # New fields (placeholders)
            "location": post.location.name if post.location else None,
            "music_title": None,   # Needs Graph API
            "shares": None,        # Needs Graph API
            "saves": None,         # Needs Graph API
        }
        comments = []
        try:
            for c in post.get_comments():
                comments.append({
                    "text": getattr(c, "text", ""),
                    "owner": getattr(getattr(c, "owner", None), "username", None)
                })
                if len(comments) >= MAX_COMMENTS_PER_POST:
                    break
        except Exception:
            pass
        data["comments"] = comments
        posts.append(data)
    return posts

# -----------------------
# Main analysis
# -----------------------
def analyse_posts(posts, csv_path):
    init_pipelines()
    rows = []
    for p in tqdm(posts, desc="Analysing"):
        mood = classify_mood(p["caption"])
        tone_counts = analyse_comment_tone(p["comments"])
        duration, hook_ratio = (None, None)
        if p["is_video"]:
            duration, hook_ratio = download_and_get_video_meta(p["video_url"])
        rows.append({
            "id": p["id"],
            "shortcode": p["shortcode"],
            "timestamp": p["timestamp"],
            "likes": p["likes"],
            "comments_count": p["comments_count"],
            "shares": p["shares"],
            "saves": p["saves"],
            "caption": p["caption"],
            "hashtags": ",".join(p["hashtags"]),
            "mood": mood,
            "comment_tone": json.dumps(tone_counts, ensure_ascii=False),
            "video_duration": duration,
            "hook_ratio_first3s": hook_ratio,
            "location": p["location"],
            "music_title": p["music_title"],
        })
    # write CSV
    keys = ["id", "shortcode", "timestamp", "likes", "comments_count",
            "shares", "saves", "caption", "hashtags", "mood",
            "comment_tone", "video_duration", "hook_ratio_first3s",
            "location", "music_title"]
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        for r in rows:
            writer.writerow(r)
    return rows

def print_suggestions(rows):
    moods = [r["mood"] for r in rows if r["mood"]]
    most_common_mood = Counter(moods).most_common(1)
    hashtags = [h for r in rows for h in (r["hashtags"].split(",") if r["hashtags"] else [])]
    top_hashtags = [h for h, _ in Counter(hashtags).most_common(5)]
    avg_hook = mean([r["hook_ratio_first3s"] for r in rows if r["hook_ratio_first3s"]]) \
               if any(r["hook_ratio_first3s"] for r in rows) else None

    print("\n--- Suggestions ---")
    if most_common_mood:
        print(f"• Most frequent mood detected: {most_common_mood[0][0]}")
    if top_hashtags:
        print(f"• Top hashtags to refine around: {', '.join(top_hashtags)}")
    if avg_hook:
        print(f"• Average first-3-seconds hook ratio: {avg_hook:.2f}")
        if avg_hook < 0.2:
            print("  ⚠️ Consider stronger hooks or better thumbnails.")
    print("• Post when your audience is most active (use IG Insights for best timing).")
    print("• Tailor colour palette & fonts to the dominant mood for stronger branding.")

# -----------------------
# Run
# -----------------------
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Instagram Post Analyzer (Hackathon Ready)")
    parser.add_argument("--username", required=True, help="Instagram username")
    parser.add_argument("--max_posts", type=int, default=10, help="Max posts to fetch")
    parser.add_argument("--login_user", help="Login username (optional)")
    parser.add_argument("--login_pass", help="Login password (optional)")
    parser.add_argument("--out", default="ig_posts.csv", help="Output CSV path")
    args = parser.parse_args()

    posts = get_posts_instaloader(args.username,
                                  max_posts=args.max_posts,
                                  login_user=args.login_user,
                                  login_pass=args.login_pass)
    rows = analyse_posts(posts, args.out)
    print(f"\n✅ CSV saved to {args.out}")
    print_suggestions(rows)


usage: ipykernel_launcher.py [-h] --username USERNAME [--max_posts MAX_POSTS]
                             [--login_user LOGIN_USER]
                             [--login_pass LOGIN_PASS] [--out OUT]
ipykernel_launcher.py: error: the following arguments are required: --username


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [10]:
!{sys.executable} -m pip install instaloader tqdm moviepy requests


Collecting moviepy
  Downloading moviepy-2.2.1-py3-none-any.whl.metadata (6.9 kB)
Collecting imageio_ffmpeg>=0.2.0 (from moviepy)
  Downloading imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl.metadata (1.5 kB)
Collecting proglog<=1.0.0 (from moviepy)
  Downloading proglog-0.1.12-py3-none-any.whl.metadata (794 bytes)
Downloading moviepy-2.2.1-py3-none-any.whl (129 kB)
Downloading imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl (31.2 MB)
   ---------------------------------------- 0.0/31.2 MB ? eta -:--:--
   -- ------------------------------------- 2.1/31.2 MB 11.8 MB/s eta 0:00:03
   ----------- ---------------------------- 8.9/31.2 MB 23.1 MB/s eta 0:00:01
   ------------------- -------------------- 15.5/31.2 MB 25.6 MB/s eta 0:00:01
   -------------------------- ------------- 20.4/31.2 MB 25.3 MB/s eta 0:00:01
   --------------------------------- ------ 26.2/31.2 MB 25.5 MB/s eta 0:00:01
   ---------------------------------------  31.2/31.2 MB 26.0 MB/s eta 0:00:01
   -----------------------

In [9]:
import sys
!{sys.executable} -m pip install transformers torch


Collecting transformers
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting torch
  Downloading torch-2.8.0-cp312-cp312-win_amd64.whl.metadata (30 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.35.0-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.6.2-cp38-abi3-win_amd64.whl.metadata (4.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading transformers-4.56.2-py3-none-any.whl (11.6 MB)
   ---------------------------------------- 0.0/11.6 MB ? eta -:--:--
   ---- ----------------------------------- 1.3/11.6 MB 11.3 MB/s eta 0:00:01
   ------------------------- -------------- 7.3/11.6 MB 22.7 MB/s eta 0:00:01
   ---------------------------------------- 11.6/1

In [None]:
import sys
!{sys.executable} -m pip install instaloader



Collecting instaloader
  Downloading instaloader-4.14.2-py3-none-any.whl.metadata (6.7 kB)
Downloading instaloader-4.14.2-py3-none-any.whl (67 kB)
Installing collected packages: instaloader
Successfully installed instaloader-4.14.2
