In [None]:
# 📊 Lead AI Harvest - Jupyter Notebook Demo

This notebook demonstrates how to validate and prioritize scraped business leads using OpenAI's API. It shows how lead data is loaded, processed, scored with AI, and filtered for quality leads.


In [None]:
import pandas as pd

# Load sample lead CSV
leads_df = pd.read_csv("../data/leads_sample1.csv")  
leads_df.head()


In [None]:
import openai
import os

# Load your OpenAI API key (Set this securely in environment or .env file)
openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..."  # Replace with your actual key temporarily

def get_lead_score(lead):
    prompt = (
        f"Evaluate the following business lead:\n\n"
        f"Company: {lead['company_name']}\n"
        f"Industry: {lead['industry']}\n"
        f"Description: {lead['description']}\n"
        f"LinkedIn: {lead['linkedin_url']}\n\n"
        f"Give a lead priority score between 1 (low) and 10 (high)."
    )
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", 
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7
        )
        score = int(''.join(filter(str.isdigit, response['choices'][0]['message']['content'])))
        return score
    except Exception as e:
        print(f"Error: {e}")
        return None


In [None]:
sample_leads = leads_df.head(5).copy()  # Only 5 for demo
sample_leads["priority_score"] = sample_leads.apply(get_lead_score, axis=1)
sample_leads


In [None]:
# Filter high priority leads
top_leads = sample_leads[sample_leads["priority_score"] >= 7]
top_leads


In [None]:
top_leads.to_csv("../data/top_validated_leads.csv", index=False)
