In [None]:
pip install instaloader python-dotenv

Collecting instaloader
  Using cached instaloader-4.14.2-py3-none-any.whl.metadata (6.7 kB)
Collecting google-generativeai
  Using cached google_generativeai-0.8.5-py3-none-any.whl.metadata (3.9 kB)
Collecting google-ai-generativelanguage==0.6.15 (from google-generativeai)
  Using cached google_ai_generativelanguage-0.6.15-py3-none-any.whl.metadata (5.7 kB)
Collecting google-api-core (from google-generativeai)
  Using cached google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)
Collecting google-api-python-client (from google-generativeai)
  Using cached google_api_python_client-2.178.0-py3-none-any.whl.metadata (7.0 kB)
Collecting google-auth>=2.15.0 (from google-generativeai)
  Using cached google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)
Collecting pydantic (from google-generativeai)
  Using cached pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting proto-plus<2.0.0dev,>=1.22.3 (from google-ai-generativelanguage==0.6.15->google-generativeai)
  Using cached prot

ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'c:\\Python312\\Scripts\\instaloader.exe' -> 'c:\\Python312\\Scripts\\instaloader.exe.deleteme'


[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
pip install google-generativeai

Collecting google-generativeai
  Using cached google_generativeai-0.8.5-py3-none-any.whl.metadata (3.9 kB)
Collecting google-ai-generativelanguage==0.6.15 (from google-generativeai)
  Using cached google_ai_generativelanguage-0.6.15-py3-none-any.whl.metadata (5.7 kB)
Collecting google-api-core (from google-generativeai)
  Using cached google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)
Collecting google-api-python-client (from google-generativeai)
  Using cached google_api_python_client-2.178.0-py3-none-any.whl.metadata (7.0 kB)
Collecting google-auth>=2.15.0 (from google-generativeai)
  Using cached google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)
Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core->google-generativeai)
  Using cached googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)
Collecting google-auth-httplib2<1.0.0,>=0.2.0 (from google-api-python-client->google-generativeai)
  Using cached google_auth_httplib2-0.2.0-py2.py3-no


[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import instaloader
import json
import re
from dotenv import load_dotenv
import os
import google.generativeai as genai
from glob import glob

# ================== CONFIG ==================
USERNAME = "tapash.bs"  # Change to desired Instagram handle
NUM_POSTS = 15  # Number of recent posts to fetch
MEDIA_DIR = f"{USERNAME}_media"
JSON_FILE = f"{USERNAME}_instagram_data.json"

# Load environment variables
load_dotenv("key.env")
SESSION_ID = os.getenv("INSTAGRAM_SESSIONID")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if not SESSION_ID or not GEMINI_API_KEY:
    raise ValueError("Missing INSTAGRAM_SESSIONID or GEMINI_API_KEY in key.env file!")

# ================== FUNCTIONS ==================
def extract_locations_from_text(text):
    """Extracts possible location names from captions."""
    if not text:
        return []
    possible_places = re.findall(
        r'\b(?:at|in|visited|to)\s+([A-Z][a-zA-Z]+(?:\s[A-Z][a-zA-Z]+)*)',
        text
    )
    return list(set(possible_places))

# ================== INSTALOADER SETUP ==================
print(f"[+] Fetching last {NUM_POSTS} posts for {USERNAME}...")

L = instaloader.Instaloader(
    download_videos=False,
    download_comments=False,
    save_metadata=False
)
L.context._session.cookies.set("sessionid", SESSION_ID)

profile = instaloader.Profile.from_username(L.context, USERNAME)
os.makedirs(MEDIA_DIR, exist_ok=True)

# ================== SCRAPING LOOP ==================
data = []
for i, post in enumerate(profile.get_posts()):
    if i >= NUM_POSTS:
        break

    caption = post.caption or ""
    detected_locations = extract_locations_from_text(caption)

    # Download image
    L.download_pic(
        os.path.join(MEDIA_DIR, post.shortcode),
        post.url,
        post.date_utc
    )

    # Detect actual saved file (handles .jpg/.png/.webp)
    matching_files = glob(os.path.join(MEDIA_DIR, f"{post.shortcode}.*"))
    if matching_files:
        media_path = matching_files[0]
    else:
        media_path = None

    post_data = {
        "date": post.date_utc.strftime("%Y-%m-%d %H:%M:%S"),
        "caption": caption,
        "hashtags": post.caption_hashtags,
        "likes": post.likes,
        "comments": post.comments,
        "location": post.location.name if post.location else None,
        "extra_detected_locations": detected_locations,
        "url": f"https://www.instagram.com/p/{post.shortcode}/",
        "image_path": media_path
    }
    data.append(post_data)

print(f"[+] Collected {len(data)} posts.")

# Save JSON
with open(JSON_FILE, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=4, ensure_ascii=False)
print(f"[+] Data saved to {JSON_FILE}")

# ================== GEMINI ANALYSIS ==================
print("[+] Sending data & images to Gemini for analysis...")

genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

# Prepare multimodal input (text + images)
prompt_text = (
    "Analyze this Instagram data and images to create a detailed structured report with:\n"
    "1. Table of locations visited (with frequency)\n"
    "2. Landmarks or locations detected from images\n"
    "3. List of purchases mentioned or implied\n"
    "4. Lifestyle summary (travel habits, dining, brands)\n"
    "5. Possible financial status assessment\n"
    "Consider both text and visual clues.\n\n"
)

contents = [prompt_text]

for post in data:
    text_part = (
        f"Date: {post['date']}\n"
        f"URL: {post['url']}\n"
    )
    if post['location']:
        text_part += f"Location (from IG): {post['location']}\n"
    if post['extra_detected_locations']:
        text_part += f"Possible Locations (from text): {', '.join(post['extra_detected_locations'])}\n"
    text_part += f"Caption: {post['caption']}\n"
    if post['hashtags']:
        text_part += f"Hashtags: {', '.join(post['hashtags'])}\n"
    text_part += "---\n"

    contents.append(text_part)

    # Attach image if exists
    if post["image_path"] and os.path.exists(post["image_path"]):
        with open(post["image_path"], "rb") as img_file:
            contents.append({
                "mime_type": "image/jpeg",  # Gemini accepts even if original is webp/png
                "data": img_file.read()
            })

# Send one batch request to Gemini
response = model.generate_content(contents)

print("\n[+] Gemini Report:\n")
print(response.text)

[+] Fetching last 15 posts for rj.mahvash...


JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DJy1AJ7SyL1.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\C2CiPZnJzzY.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\CuJ6H0mp-tM.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DNA98kASeiV.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DM-dtEOyY5L.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DM5YTByyEe5.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DM0Nq6dyS6m.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DMrtaIfyYhj.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DMaTjeYSeFE.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DMNTlWJtUDa.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DMDM1wZttg7.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DL979-bStlY.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]
JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DL2Euf3N8Dm.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DLxELeNNKhj.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


rj.mahvash_media\DLpf5VLMUjH.jpg 

JSON Query to graphql/query: 403 Forbidden when accessing https://www.instagram.com/graphql/query [retrying; skip with ^C]


[+] Collected 15 posts.
[+] Data saved to rj.mahvash_instagram_data.json
[+] Sending data & images to Gemini for analysis...

[+] Gemini Report:

## Instagram Data Analysis Report for @rjmahvash (2025)

This report analyzes Instagram posts from @rjmahvash, focusing on location, landmarks, purchases, lifestyle, and potential financial status.  Analysis is limited to the provided data and images.

**1. Table of Locations Visited (with Frequency):**

| Location        | Frequency | Evidence                                                              |
|-----------------|------------|-------------------------------------------------------------------------|
| India           | Multiple   | Multiple posts mentioning India, Bollywood, and filming locations.       |
| Dubai, UAE      | 1          | Image shows Dubai Mall, Souk Al Bahar.                                |
| UK (Unspecified)| 2          | Images suggest locations in the UK (based on architectural styles). |
| Film Sets       | M