In [13]:
import os
import json
import time
import requests
import gspread
import logging
from tqdm import tqdm
from datetime import datetime
from google.oauth2.service_account import Credentials

# configuration

subreddits = ["forhire", "designjobs", "slavelabour", "remotejobs", "techjobs"]
post_limit = 100
lead_threshold = 0.7
alert_threshold = 0.7
model_name = "openai/gpt-4o-mini"
sheet_name = "Warm Leads"
openrouter_key = "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"
telegram_bot_token = "8477717556:AAFazITEA-HkQmd1xsLTK-vmsSMfUhbfzJ8"
telegram_chat_id = "-4900686671"

# system prompt

system_prompt = """
You are an expert Lead Qualification Assistant. 
Analyze a Reddit post and decide if it is a "warm lead" — someone likely looking to hire or buy a tech service 
(web/app development, UI/UX, automation, etc.).
Be generous in scoring if there are clear hints of hiring, project requests, or job openings.
Return only valid JSON with keys: "is_lead", "confidence_score", "summary".
The confidence_score should be a float between 0 and 1, where:
- 0.9–1.0 = strong hiring intent
- 0.6–0.8 = possible lead, likely buyer
- 0.3–0.5 = unsure
- below 0.3 = not a lead
"""

# logging setup

logging.basicConfig(
    filename="warm_lead_bot.log",
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s"
)

def log(msg):
    print(msg)
    logging.info(msg)

# fetch posts

def fetch_reddit_posts():
    all_posts = []
    for sub in subreddits:
        url = f"https://api.pullpush.io/reddit/search/submission/?subreddit={sub}&size={post_limit}&sort=desc"
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
            for post in data["data"]:
                all_posts.append({
                    "title": post.get("title", ""),
                    "text": post.get("selftext", ""),
                    "url": f"https://www.reddit.com{post.get('permalink', '')}" if post.get("permalink") else post.get("full_link", ""),
                    "source": sub
                })
            log(f"{len(data['data'])} posts fetched from r/{sub}")
        except Exception as e:
            log(f"failed to fetch from r/{sub}: {e}")
    return all_posts

# lead qualification

def qualify_post(post, retries=3):
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {openrouter_key}",
        "Content-Type": "application/json"
    }

    text = f"Title: {post['title']}\n\nText: {post['text']}"
    payload = {
        "model": model_name,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": text}
        ]
    }

    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, json=payload, timeout=40)

            if response.status_code != 200:
                log(f"API returned {response.status_code}: {response.text[:120]}")
                time.sleep(3)
                continue

            data = response.json()
            if not data.get("choices"):
                log("Empty response from API (no 'choices' key). Retrying...")
                time.sleep(3)
                continue

            content = data["choices"][0]["message"].get("content", "").strip()
            if not content:
                log("Model returned empty content. Retrying...")
                time.sleep(3)
                continue

            try:
                result = json.loads(content)
                return result
            except json.JSONDecodeError:
                log(f"Could not parse JSON: {content[:120]}")
                time.sleep(3)
                continue

        except requests.exceptions.Timeout:
            log(f"Timeout on attempt {attempt+1}. Retrying...")
            time.sleep(4)

        except requests.exceptions.RequestException as e:
            log(f"Network error (attempt {attempt+1}): {e}")
            time.sleep(4)

    log(f"Failed to analyze post after {retries} attempts: {post['title'][:80]}")
    return None

# google sheets connection

def connect_to_sheet():
    scope = [
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive"
    ]
    creds = Credentials.from_service_account_file("credentials.json", scopes=scope)
    client = gspread.authorize(creds)
    sheet = client.open(sheet_name).sheet1
    log("Connected to Google Sheets")
    return sheet

def save_lead(sheet, post, result):
    try:
        sheet.append_row([
            datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            post["title"],
            post["source"],
            post["url"],
            result.get("summary", ""),
            result.get("confidence_score", 0)
        ])
        log(f"Lead saved: {post['title'][:60]}...")
    except Exception as e:
        log(f"Failed to save to sheet: {e}")

# telegram alert

def send_telegram_alert(post, result):
    try:
        score = result.get("confidence_score", 0)
        if score >= alert_threshold:
            text = (
                f"*New Warm Lead!*\n\n"
                f"*Subreddit:* {post['source']}\n"
                f"*Title:* {post['title']}\n"
                f"{result.get('summary', 'No summary')}\n"
                f"[View Post]({post['url']})\n"
                f"Confidence: {score*100:.1f}%"
            )
            url = f"https://api.telegram.org/bot{telegram_bot_token}/sendMessage"
            payload = {
                "chat_id": telegram_chat_id,
                "text": text,
                "parse_mode": "Markdown"
            }
            response = requests.post(url, json=payload, timeout=10)
            if response.status_code == 200:
                log(f"Telegram alert sent for: {post['title'][:60]}...")
            else:
                log(f"Telegram send failed ({response.status_code}): {response.text}")
    except Exception as e:
        log(f"Telegram notification error: {e}")

# main pipeline

def main():
    log("Starting Warm Lead Generation Bot")
    posts = fetch_reddit_posts()
    if not posts:
        log("No posts fetched. Exiting.")
        return

    sheet = connect_to_sheet()
    log(f"Analyzing {len(posts)} posts...")

    qualified_count = 0

    for i, post in enumerate(posts, start=1):
        print(f"\n[{i}/{len(posts)}] Analyzing: {post['title']}")
        result = qualify_post(post)

        if not result:
            log("Skipped (no result returned)")
            continue

        score = result.get("confidence_score", 0)

        if result.get("is_lead") and score >= lead_threshold:
            save_lead(sheet, post, result)
            send_telegram_alert(post, result)
            qualified_count += 1
        else:
            log("Not a valid or strong lead")

        time.sleep(1)

    log(f"Total Qualified Leads: {qualified_count}")
    print(f"\nTotal Qualified Leads: {qualified_count}")
    log("Process completed successfully. Check your Google Sheet.")

# run

if __name__ == "__main__":
    main()


  "is_lead": true,
  "confidence_score": 0.9,
  "summary": "The post indicates a clear hiring intent for a vir
Not a valid or strong lead

[300/500] Analyzing: [offer] Help me LF someone to promote me
Not a valid or strong lead

[301/500] Analyzing: Earn Money from Home with Paidwork – My Honest Experience
Could not parse JSON: ```json
{
  "is_lead": false,
  "confidence_score": 0.1,
  "summary": "The post discusses earning money from home but do
Not a valid or strong lead

[302/500] Analyzing: [For Hire] Remote Product Consultant / Product Manager | AI + No-Code | MVP Strategy &amp; Execution | $30–$50/hr | Open to Flexible Time Zones
Lead saved: [For Hire] Remote Product Consultant / Product Manager | AI ...
Telegram alert sent for: [For Hire] Remote Product Consultant / Product Manager | AI ...

[303/500] Analyzing: 10$ for 10 mins of your time(US Canada, India, Indonesia, UK, Phillipines)
Not a valid or strong lead

[304/500] Analyzing: 10$ for 10 mins of your time(US Canada, India

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3922c0d8-f439-49b0-812b-9200c7cdbe90' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>