In [1]:
!pip install pandas



In [2]:
import os

os.makedirs("data", exist_ok=True)
os.makedirs("output", exist_ok=True)

print("Project folders ready")


Project folders ready


In [3]:
import pandas as pd

profiles = pd.DataFrame({
    "name": [
        "Dr. Sarah Lee",
        "Dr. Mark Evans",
        "Dr. Ananya Rao"
    ],
    "title": [
        "Director of Toxicology",
        "Senior Scientist",
        "Head of Preclinical Safety"
    ],
    "company": [
        "BioNova Inc",
        "HealthCore Labs",
        "NeoThera"
    ],
    "linkedin_url": [
        "https://linkedin.com/in/sarahlee",
        "https://linkedin.com/in/markevans",
        "https://linkedin.com/in/ananyarao"
    ],
    "person_location": [
        "Boston",
        "Texas",
        "UK"
    ],
    "company_hq": [
        "Cambridge MA",
        "San Diego CA",
        "Cambridge UK"
    ]
})

profiles.to_csv("data/input_profiles.csv", index=False)
profiles


Unnamed: 0,name,title,company,linkedin_url,person_location,company_hq
0,Dr. Sarah Lee,Director of Toxicology,BioNova Inc,https://linkedin.com/in/sarahlee,Boston,Cambridge MA
1,Dr. Mark Evans,Senior Scientist,HealthCore Labs,https://linkedin.com/in/markevans,Texas,San Diego CA
2,Dr. Ananya Rao,Head of Preclinical Safety,NeoThera,https://linkedin.com/in/ananyarao,UK,Cambridge UK


In [4]:
pubmed = pd.DataFrame({
    "name": [
        "Dr. Sarah Lee",
        "Dr. Mark Evans",
        "Dr. Ananya Rao"
    ],
    "recent_papers": [
        2,
        0,
        3
    ],
    "keywords": [
        "DILI, liver toxicity",
        "",
        "3D in-vitro, NAMs"
    ]
})

pubmed.to_csv("data/pubmed_results.csv", index=False)
pubmed


Unnamed: 0,name,recent_papers,keywords
0,Dr. Sarah Lee,2,"DILI, liver toxicity"
1,Dr. Mark Evans,0,
2,Dr. Ananya Rao,3,"3D in-vitro, NAMs"


In [5]:
profiles = pd.read_csv("data/input_profiles.csv")
pubmed = pd.read_csv("data/pubmed_results.csv")

df = profiles.merge(pubmed, on="name", how="left")

# simple business email inference
df["email"] = df["company"].str.lower().str.replace(" ", "") + "@company.com"

df


Unnamed: 0,name,title,company,linkedin_url,person_location,company_hq,recent_papers,keywords,email
0,Dr. Sarah Lee,Director of Toxicology,BioNova Inc,https://linkedin.com/in/sarahlee,Boston,Cambridge MA,2,"DILI, liver toxicity",bionovainc@company.com
1,Dr. Mark Evans,Senior Scientist,HealthCore Labs,https://linkedin.com/in/markevans,Texas,San Diego CA,0,,healthcorelabs@company.com
2,Dr. Ananya Rao,Head of Preclinical Safety,NeoThera,https://linkedin.com/in/ananyarao,UK,Cambridge UK,3,"3D in-vitro, NAMs",neothera@company.com


In [6]:
def role_score(title):
    if "Director" in title or "Head" in title:
        return 100
    elif "Senior" in title:
        return 60
    else:
        return 30

def research_score(papers):
    return min(papers * 30, 100)

def funding_score(company):
    funded_companies = ["BioNova Inc", "NeoThera"]
    return 80 if company in funded_companies else 30

def location_score(hq):
    hubs = ["Cambridge", "Boston", "Bay Area"]
    return 70 if any(hub in hq for hub in hubs) else 30

df["role_score"] = df["title"].apply(role_score)
df["research_score"] = df["recent_papers"].fillna(0).apply(research_score)
df["funding_score"] = df["company"].apply(funding_score)
df["location_score"] = df["company_hq"].apply(location_score)

df["final_score"] = (
    0.3 * df["role_score"] +
    0.4 * df["research_score"] +
    0.2 * df["funding_score"] +
    0.1 * df["location_score"]
)

df = df.sort_values("final_score", ascending=False)
df["rank"] = range(1, len(df) + 1)

final_df = df[
    [
        "rank",
        "final_score",
        "name",
        "title",
        "company",
        "person_location",
        "company_hq",
        "email",
        "linkedin_url"
    ]
]

final_df.to_csv("output/lead_scores.csv", index=False)
final_df


Unnamed: 0,rank,final_score,name,title,company,person_location,company_hq,email,linkedin_url
2,1,89.0,Dr. Ananya Rao,Head of Preclinical Safety,NeoThera,UK,Cambridge UK,neothera@company.com,https://linkedin.com/in/ananyarao
0,2,77.0,Dr. Sarah Lee,Director of Toxicology,BioNova Inc,Boston,Cambridge MA,bionovainc@company.com,https://linkedin.com/in/sarahlee
1,3,27.0,Dr. Mark Evans,Senior Scientist,HealthCore Labs,Texas,San Diego CA,healthcorelabs@company.com,https://linkedin.com/in/markevans


In [7]:
from google.colab import files
files.download("output/lead_scores.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
!zip -r ai-lead-scoring-agent.zip data output


updating: data/ (stored 0%)
updating: data/pubmed_results.csv (deflated 10%)
updating: data/input_profiles.csv (deflated 38%)
updating: output/ (stored 0%)
updating: output/lead_scores.csv (deflated 41%)
