# 🧠 LeadCraftr - Vectorisation & Matching

In [76]:
# Imports principaux
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [78]:
freelance_df = pd.read_csv('../generate_datasets/updated_freelances_dataset.csv', index_col='freelance_id').reset_index()
prospect_df = pd.read_csv('../generate_datasets/updated_prospects_dataset.csv', index_col='prospect_id').reset_index()
freelance_df

Unnamed: 0,freelance_id,nom,titre,secteur_principal,skills_top3,ville,tjm,mission_statement,tonalite_preferee,style_prefere
0,1,Allison Hill,Digital Marketing,Marketing,Analytics|Google Ads|SEO,East Jill,397.68,"With Allison Hill, strategic marketing strateg...",Professional,Storytelling
1,2,Javier Johnson,Renewable Energy Expertise,GreenTech,LCA|Project Management|Energy Modeling,East William,462.86,"With Javier Johnson, focused expertise in LCA ...",Serious,Formal
2,3,Meredith Barnes,Environmental Impact Analysis,GreenTech,Energy Modeling|IoT|LCA,Lawrencetown,831.17,Meredith Barnes advances effective sustainabil...,Friendly,Storytelling
3,4,Donald Lewis,Supply Chain Management,Retail/E-commerce,Facebook Ads|Customer Support|Data Analytics,Curtisfurt,371.55,"With Donald Lewis, innovative expertise in Fac...",Creative,Formal
4,5,Renee Blair,Customer Service,Retail/E-commerce,Supply-Chain|Shopify|Customer Support,South Christianport,621.60,"With Renee Blair, trusted expertise in Supply-...",Professional,Warm
...,...,...,...,...,...,...,...,...,...,...
295,296,Tracy Rivera,Project Management,GreenTech,Energy Modeling|IoT|Carbon Accounting,Rodriguezville,847.87,Tracy Rivera advances passionate sustainabilit...,Energetic,Warm
296,297,Kristin Watts,Ed-Tech Integration,Education/Ed-Tech,Learning Analytics|Storyline360|HTML5,South Neil,374.88,Kristin Watts excels in vibrant Learning Analy...,Energetic,Formal
297,298,Kathryn Cooper,Customer Service,Retail/E-commerce,Data Analytics|Supply-Chain|Facebook Ads,South Danchester,534.32,Kathryn Cooper drives exclusive e-commerce suc...,Premium,Storytelling
298,299,Eric Gibson,Community Management,Wellness,Copywriting|Pilates|Nutrition,New Timothy,519.93,Eric Gibson promotes visionary wellness with C...,Creative,Warm


In [79]:
prospect_df

Unnamed: 0,prospect_id,company,sector,main_contact,contact_role,city,mission_statement,company_size,funding_stage,ticket_size_class,target_tone,remote
0,1,"Marsh, Spears and Yang",FinTech,Kristen Rivera,CEO,Mullenbury,"Delivering premium financial platforms, pionee...",SME (21-200),Series C+,High,Premium,No
1,2,Lopez Ltd,FinTech,Allen Mendez,CEO,South Brittany,"Delivering effective financial platforms, pion...",Enterprise (1000+),Series A,High,Creative,No
2,3,Ramirez Ltd,Design,Amy Garcia,Head of Marketing,West Mark,Crafting exclusive designs to drive brand iden...,Mid-size (201-1000),Seed,Medium,Premium,Yes
3,4,"Huffman, Rose and Fowler",GreenTech,Lisa Matthews,Head of Data,Michaelmouth,"Driving effective eco-innovation, revolutioniz...",Enterprise (1000+),Pre-Seed,High,Serious,No
4,5,"Mcbride, Sellers and Lawrence",GreenTech,Emily Allen,CEO,East Nathanielberg,"Driving effective eco-innovation, transform a ...",Enterprise (1000+),Series C+,High,Energetic,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...
2995,2996,Hill PLC,Retail/E-commerce,Amanda Bird,CEO,Fergusonport,"Transforming effective retail platforms, trans...",Startup (1-20),Series B,High,Friendly,No
2996,2997,"Mendoza, Hayes and Lynch",Retail/E-commerce,Timothy Alvarado,CEO,Andrewfort,Providing effective e-commerce solutions to pi...,Mid-size (201-1000),Seed,Medium,Friendly,Yes
2997,2998,"Carpenter, Nielsen and Stone",Tech/SaaS,Elaine Rodriguez,CTO,Burchmouth,"Empowering premium digital transformation, emp...",SME (21-200),Series B,High,Premium,Yes
2998,2999,Wells Ltd,Marketing,Renee Hurst,CTO,Stanleyland,"Transforming exclusive campaigns, empower visi...",Startup (1-20),Series B,High,Premium,Yes


In [80]:
def vectorize_missions(freelance_df, prospect_df):
    """Vectorise les mission statements des freelances et prospects avec TF-IDF."""
    vectorizer = TfidfVectorizer(stop_words='english')
    freelance_tfidf = vectorizer.fit_transform(freelance_df["mission_statement"])
    prospect_tfidf = vectorizer.transform(prospect_df["mission_statement"])
    return freelance_tfidf, prospect_tfidf

def get_top_20_leads(freelance_vec, prospect_tfidf, prospect_df):
    """
    Retourne les 20 prospects les plus similaires pour un vecteur freelance donné.

    Args:
        freelance_vec: Vecteur TF-IDF (1D ou 2D) du freelance (shape: (1, n_features)).
        prospect_tfidf: Matrice TF-IDF des prospects.
        prospect_df: DataFrame des prospects.

    Returns:
        DataFrame des 20 prospects les plus similaires avec score de similarité.
    """
    similarities = cosine_similarity(freelance_vec, prospect_tfidf).flatten()
    top_20_indices = similarities.argsort()[-20:][::-1]

    return prospect_df.iloc[top_20_indices].assign(
        similarity=similarities[top_20_indices]
    )

In [81]:
freelance_tfidf, prospect_tfidf = vectorize_missions(freelance_df, prospect_df)
top_20_df = get_top_20_leads(freelance_tfidf[0], prospect_tfidf, prospect_df)

In [82]:
top_20_df

Unnamed: 0,prospect_id,company,sector,main_contact,contact_role,city,mission_statement,company_size,funding_stage,ticket_size_class,target_tone,remote,similarity
351,352,Vazquez and Sons,Marketing,Jonathan Hubbard,Head of Data,Garrettport,Delivering effective marketing strategies to e...,Mid-size (201-1000),Seed,Medium,Creative,Yes,0.523287
2629,2630,Brown Group,Marketing,Jessica Lane,Head of Data,South Lisa,Delivering effective marketing strategies to d...,Startup (1-20),Pre-Seed,Low,Energetic,No,0.522379
1712,1713,"Blevins, Brooks and Harrison",Marketing,Tracy Chaney,CEO,Abbottland,Delivering effective marketing strategies to d...,Startup (1-20),Series C+,High,Serious,Yes,0.522379
686,687,Potts-Nelson,Marketing,Andrew Romero,CEO,Ryanport,Delivering effective marketing strategies to d...,Enterprise (1000+),Pre-Seed,High,Professional,No,0.522379
2163,2164,Jones-Figueroa,Marketing,Kathryn Garcia,Head of Data,Smithstad,Delivering effective marketing strategies to d...,Enterprise (1000+),Series B,High,Serious,Yes,0.522379
1453,1454,Tran-Winters,Marketing,Joshua Nelson,CEO,Michaelville,Delivering effective marketing strategies to d...,Enterprise (1000+),Pre-Seed,High,Friendly,Yes,0.522379
594,595,Miller-Hudson,Marketing,William Webb,Head of Data,West Christopherport,Delivering effective marketing strategies to d...,Startup (1-20),Pre-Seed,Low,Serious,Yes,0.522379
2001,2002,Jones and Sons,Marketing,Marc Lee,CEO,Chavezfort,Delivering effective marketing strategies to d...,Startup (1-20),Series C+,High,Serious,Yes,0.522379
272,273,Moreno Group,Marketing,Timothy Cole,Head of Data,Suzanneton,Delivering effective marketing strategies to d...,Startup (1-20),Pre-Seed,Low,Serious,Yes,0.522379
1905,1906,Maxwell-Smith,Marketing,Holly Martin,CEO,Sierrafurt,Delivering effective marketing strategies to d...,Startup (1-20),Seed,Low,Friendly,Yes,0.522379
