In [1]:
import os
print(os.getcwd())

C:\Users\Partiran\Desktop


In [18]:
import requests
import time
import json
from datetime import datetime

# Configuration
API_URL = "https://rahavard365.com/api/v2/social/posts"
OUTPUT_FILE = f"filtered_posts_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
TARGET_ENTITY_ID = "7836"  # specific entity ID
MAX_REQUESTS = 700
DELAY = 1.5  # seconds

# Headers from browser inspection
headers = {
    "authority": "rahavard365.com",
    "accept": "application/json, text/plain, */*",
    "accept-language": "en-US,en;q=0.9",
    "application-name": "rahavard",
    "content-type": "application/json",
    "referer": "https://rahavard365.com/social",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

# Initialize storage
filtered_posts = []
last_id = None
request_count = 0

print(f"Starting to collect posts for entity ID: {TARGET_ENTITY_ID}")

while request_count < MAX_REQUESTS:
    # Prepare parameters
    params = {
        "exclude_replies": "true",
        "has_chart_analysis": "false",
        "_count": 16,
        "entity_id": TARGET_ENTITY_ID,
        "entity_type": "exchange.asset",
        "_t": int(time.time() * 1000),
    }
    
    if last_id:
        params["before_id"] = last_id
    
    try:
        print(f"\nRequest #{request_count + 1}")
        # print(f"Params: {params}")
        
        response = requests.get(
            API_URL,
            params=params,
            headers=headers,
            timeout=10
        )
        request_count += 1
        
        print(f"Status: {response.status_code}")
        
        if response.status_code == 200:
            data = response.json()
            
            if isinstance(data, dict) and data.get("data"):
                batch = data["data"]
                
                if not batch:
                    print("No more posts available")
                    break
                
                # Verify entity_id in each post
                valid_posts = [
                    p for p in batch 
                    if str(p.get("entity_id")) == TARGET_ENTITY_ID
                ]
                
                if len(valid_posts) != len(batch):
                    print(f"Filtered {len(batch) - len(valid_posts)} posts with wrong entity_id")
                
                if valid_posts:
                    filtered_posts.extend(valid_posts)
                    last_id = valid_posts[-1]["id"]
                    # print(f"Added {len(valid_posts)} valid posts (Total: {len(filtered_posts)})")
                else:
                    print("No valid posts in this batch")
                    break
            else:
                print("Unexpected response format")
                break
        else:
            print(f"HTTP Error {response.status_code}")
            break
            
        time.sleep(DELAY)
        
    except Exception as e:
        print(f"Error: {str(e)}")
        break

# Save results
print(f"\nFinished. Collected {len(filtered_posts)} posts for entity {TARGET_ENTITY_ID}")
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(filtered_posts, f, ensure_ascii=False, indent=2)

print(f"Results saved to {OUTPUT_FILE}")

# Verification
if filtered_posts:
    print("\nVerification:")
    print(f"First post entity_id: {filtered_posts[0].get('entity_id')}")
    print(f"Last post entity_id: {filtered_posts[-1].get('entity_id')}")
    print(f"Unique entity_ids found: {len(set(p.get('entity_id') for p in filtered_posts))}")

Starting to collect posts for entity ID: 7836

Request #1
Status: 200

Request #2
Status: 200

Request #3
Status: 200

Request #4
Status: 200

Request #5
Status: 200

Request #6
Status: 200

Request #7
Status: 200

Request #8
Status: 200

Request #9
Status: 200

Request #10
Status: 200

Request #11
Status: 200

Request #12
Status: 200

Request #13
Status: 200

Request #14
Status: 200

Request #15
Status: 200

Request #16
Status: 200

Request #17
Status: 200

Request #18
Status: 200

Request #19
Status: 200

Request #20
Status: 200

Request #21
Status: 200

Request #22
Status: 200

Request #23
Status: 200

Request #24
Status: 200

Request #25
Status: 200

Request #26
Status: 200

Request #27
Status: 200

Request #28
Status: 200

Request #29
Status: 200

Request #30
Status: 200

Request #31
Status: 200

Request #32
Status: 200

Request #33
Status: 200

Request #34
Status: 200

Request #35
Status: 200

Request #36
Status: 200

Request #37
Status: 200

Request #38
Status: 200

Request #39


In [19]:
import requests
import time
import json
from datetime import datetime
import os

# Configuration
API_URL = "https://rahavard365.com/api/v2/social/posts"
OUTPUT_FILE = f"filtered_posts_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
TARGET_ENTITY_ID = "7836"  # specific entity ID
MAX_REQUESTS = 100
DELAY = 1.5  # seconds

# Headers from browser inspection
headers = {
    "authority": "rahavard365.com",
    "accept": "application/json, text/plain, */*",
    "accept-language": "en-US,en;q=0.9",
    "application-name": "rahavard",
    "content-type": "application/json",
    "referer": "https://rahavard365.com/social",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

# Initialize storage
filtered_posts = []
last_id = None
request_count = 0

# Try to find the most recent output file to resume from
def find_latest_output_file():
    files = [f for f in os.listdir() if f.startswith("filtered_posts_") and f.endswith(".json")]
    if not files:
        return None
    return max(files, key=lambda x: os.path.getmtime(x))

# Load existing data if available
latest_file = find_latest_output_file()
if latest_file:
    try:
        with open(latest_file, "r", encoding="utf-8") as f:
            existing_data = json.load(f)
            if existing_data:
                filtered_posts = existing_data.copy()
                last_id = existing_data[-1]["id"]
                print(f"Resuming from existing data. Loaded {len(existing_data)} posts. Last ID: {last_id}")
    except Exception as e:
        print(f"Error loading existing file: {str(e)}")
        print("Starting fresh collection")

print(f"Starting to collect posts for entity ID: {TARGET_ENTITY_ID}")

while request_count < MAX_REQUESTS:
    # Prepare parameters
    params = {
        "exclude_replies": "true",
        "has_chart_analysis": "false",
        "_count": 16,
        "entity_id": TARGET_ENTITY_ID,
        "entity_type": "exchange.asset",
        "_t": int(time.time() * 1000),
    }
    
    if last_id:
        params["before_id"] = last_id
    
    try:
        print(f"\nRequest #{request_count + 1}")
        # print(f"Params: {params}")
        
        response = requests.get(
            API_URL,
            params=params,
            headers=headers,
            timeout=10
        )
        request_count += 1
        
        print(f"Status: {response.status_code}")
        
        if response.status_code == 200:
            data = response.json()
            
            if isinstance(data, dict) and data.get("data"):
                batch = data["data"]
                
                if not batch:
                    print("No more posts available")
                    break
                
                # Verify entity_id in each post
                valid_posts = [
                    p for p in batch 
                    if str(p.get("entity_id")) == TARGET_ENTITY_ID
                ]
                
                if len(valid_posts) != len(batch):
                    print(f"Filtered {len(batch) - len(valid_posts)} posts with wrong entity_id")
                
                if valid_posts:
                    filtered_posts.extend(valid_posts)
                    last_id = valid_posts[-1]["id"]
                    print(f"Added {len(valid_posts)} valid posts (Total: {len(filtered_posts)})")
                else:
                    print("No valid posts in this batch")
                    break
            else:
                print("Unexpected response format")
                break
        else:
            print(f"HTTP Error {response.status_code}")
            break
            
        time.sleep(DELAY)
        
    except Exception as e:
        print(f"Error: {str(e)}")
        # Save what we have so far in case of error
        with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
            json.dump(filtered_posts, f, ensure_ascii=False, indent=2)
        print(f"Saved intermediate results to {OUTPUT_FILE}")
        break

# Save results
print(f"\nFinished. Collected {len(filtered_posts)} posts for entity {TARGET_ENTITY_ID}")
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(filtered_posts, f, ensure_ascii=False, indent=2)

print(f"Results saved to {OUTPUT_FILE}")

# Verification
if filtered_posts:
    print("\nVerification:")
    print(f"First post entity_id: {filtered_posts[0].get('entity_id')}")
    print(f"Last post entity_id: {filtered_posts[-1].get('entity_id')}")
    print(f"Unique entity_ids found: {len(set(p.get('entity_id') for p in filtered_posts))}")

Resuming from existing data. Loaded 11200 posts. Last ID: 12617935
Starting to collect posts for entity ID: 7836

Request #1
Status: 200
Added 16 valid posts (Total: 11216)

Request #2
Status: 200
Added 16 valid posts (Total: 11232)

Request #3
Status: 200
Added 16 valid posts (Total: 11248)

Request #4
Status: 200
Added 16 valid posts (Total: 11264)

Request #5
Status: 200
Added 16 valid posts (Total: 11280)

Request #6
Status: 200
Added 16 valid posts (Total: 11296)

Request #7
Status: 200
Added 16 valid posts (Total: 11312)

Request #8
Status: 200
Added 16 valid posts (Total: 11328)

Request #9
Status: 200
Added 16 valid posts (Total: 11344)

Request #10
Status: 200
Added 16 valid posts (Total: 11360)

Request #11
Status: 200
Added 16 valid posts (Total: 11376)

Request #12
Status: 200
Added 16 valid posts (Total: 11392)

Request #13
Status: 200
Added 16 valid posts (Total: 11408)

Request #14
Status: 200
Added 16 valid posts (Total: 11424)

Request #15
Status: 200
Added 16 valid po