# 💞 Vectorisation & Matching

In [30]:
# Imports principaux
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [31]:
freelance_df = pd.read_csv('../generate_datasets/updated_freelances_dataset.csv', index_col='freelance_id').reset_index(drop=True)
prospect_df = pd.read_csv('../generate_datasets/updated_prospects_dataset.csv', index_col='prospect_id').reset_index(drop=True)
freelance_df

Unnamed: 0,name,title,main_sector,top3_skills,city,daily_rate,mission_statement,preferred_tone,preferred_style,remote
0,Allison Hill,Digital Marketing,Marketing,Analytics|Google Ads|SEO,East Jill,397.68,Allison Hill delivers trusted solutions in Ana...,Professional,Storytelling,Yes
1,Javier Johnson,Renewable Energy Expertise,GreenTech,LCA|Project Management|Energy Modeling,East William,462.86,Javier Johnson advances focused sustainability...,Serious,Formal,Yes
2,Meredith Barnes,Environmental Impact Analysis,GreenTech,Energy Modeling|IoT|LCA,Lawrencetown,831.17,"With Meredith Barnes, effective expertise in E...",Friendly,Storytelling,No
3,Donald Lewis,Supply Chain Management,Retail/E-commerce,Facebook Ads|Customer Support|Data Analytics,Curtisfurt,371.55,Donald Lewis excels in visionary Facebook Ads ...,Creative,Formal,No
4,Renee Blair,Customer Service,Retail/E-commerce,Supply-Chain|Shopify|Customer Support,South Christianport,621.60,Renee Blair excels in trusted Supply-Chain and...,Professional,Warm,Yes
...,...,...,...,...,...,...,...,...,...,...
295,Tracy Rivera,Project Management,GreenTech,Energy Modeling|IoT|Carbon Accounting,Rodriguezville,847.87,"With Tracy Rivera, dynamic expertise in Energy...",Energetic,Warm,Yes
296,Kristin Watts,Ed-Tech Integration,Education/Ed-Tech,Learning Analytics|Storyline360|HTML5,South Neil,374.88,"With Kristin Watts, passionate expertise in Le...",Energetic,Formal,No
297,Kathryn Cooper,Customer Service,Retail/E-commerce,Data Analytics|Supply-Chain|Facebook Ads,South Danchester,534.32,Kathryn Cooper excels in high-quality Data Ana...,Premium,Storytelling,Yes
298,Eric Gibson,Community Management,Wellness,Copywriting|Pilates|Nutrition,New Timothy,519.93,Eric Gibson excels in visionary Copywriting an...,Creative,Warm,No


In [32]:
prospect_df

Unnamed: 0,company,sector,main_contact,contact_role,city,mission_statement,company_size,funding_stage,ticket_size_class,target_tone,remote
0,"Marsh, Spears and Yang",FinTech,Kristen Rivera,CEO,Mullenbury,"Delivering high-quality financial platforms, d...",SME (21-200),Series C+,High,Premium,No
1,Lopez Ltd,FinTech,Allen Mendez,CEO,South Brittany,"Delivering effective financial platforms, pion...",Enterprise (1000+),Series A,High,Creative,No
2,Ramirez Ltd,Design,Amy Garcia,Head of Marketing,West Mark,Crafting high-quality designs to revolutionize...,Mid-size (201-1000),Seed,Medium,Premium,Yes
3,"Huffman, Rose and Fowler",GreenTech,Lisa Matthews,Head of Data,Michaelmouth,Advancing effective green technologies to inno...,Enterprise (1000+),Pre-Seed,High,Serious,No
4,"Mcbride, Sellers and Lawrence",GreenTech,Emily Allen,CEO,East Nathanielberg,Advancing effective green technologies to pion...,Enterprise (1000+),Series C+,High,Energetic,Yes
...,...,...,...,...,...,...,...,...,...,...,...
2995,Hill PLC,Retail/E-commerce,Amanda Bird,CEO,Fergusonport,Providing effective e-commerce solutions to dr...,Startup (1-20),Series B,High,Friendly,No
2996,"Mendoza, Hayes and Lynch",Retail/E-commerce,Timothy Alvarado,CEO,Andrewfort,Providing effective e-commerce solutions to dr...,Mid-size (201-1000),Seed,Medium,Friendly,Yes
2997,"Carpenter, Nielsen and Stone",Tech/SaaS,Elaine Rodriguez,CTO,Burchmouth,"Empowering premium digital transformation, enh...",SME (21-200),Series B,High,Premium,Yes
2998,Wells Ltd,Marketing,Renee Hurst,CTO,Stanleyland,Delivering high-quality marketing strategies t...,Startup (1-20),Series B,High,Premium,Yes


In [33]:
def vectorize_missions(freelance_df, prospect_df):
    """Vectorise les mission statements des freelances et prospects avec TF-IDF."""
    vectorizer = TfidfVectorizer(stop_words='english')
    freelance_tfidf = vectorizer.fit_transform(freelance_df["mission_statement"])
    prospect_tfidf = vectorizer.transform(prospect_df["mission_statement"])
    return freelance_tfidf, prospect_tfidf

def get_top_20_leads(freelance_vec, prospect_tfidf, prospect_df):
    """
    Retourne les 20 prospects les plus similaires pour un vecteur freelance donné.

    Args:
        freelance_vec: Vecteur TF-IDF (1D ou 2D) du freelance (shape: (1, n_features)).
        prospect_tfidf: Matrice TF-IDF des prospects.
        prospect_df: DataFrame des prospects.

    Returns:
        DataFrame des 20 prospects les plus similaires avec score de similarité.
    """
    similarities = cosine_similarity(freelance_vec, prospect_tfidf).flatten()
    top_20_indices = similarities.argsort()[-20:][::-1]

    return prospect_df.iloc[top_20_indices].assign(
        similarity=similarities[top_20_indices]
    )

In [34]:
freelance_tfidf, prospect_tfidf = vectorize_missions(freelance_df, prospect_df)
top_20_df = get_top_20_leads(freelance_tfidf[0], prospect_tfidf, prospect_df)

In [35]:
top_20_df

Unnamed: 0,company,sector,main_contact,contact_role,city,mission_statement,company_size,funding_stage,ticket_size_class,target_tone,remote,similarity
1161,"Spencer, Jones and Miller",Tech/SaaS,Mark Romero,Head of Data,Lake Dianaland,"Empowering effective digital transformation, i...",SME (21-200),Seed,Medium,Serious,No,0.31067
2414,"Joseph, Campbell and Douglas",Tech/SaaS,Tristan Rojas,Head of Data,North Melissa,"Empowering effective digital transformation, i...",Mid-size (201-1000),Seed,Medium,Creative,Yes,0.31067
1730,Perez-Smith,Tech/SaaS,Catherine Cole,Head of Data,East Renee,"Empowering effective digital transformation, i...",Enterprise (1000+),Series B,High,Professional,No,0.31067
934,Thomas Group,Tech/SaaS,Alexander Taylor,Head of Data,Patrickberg,"Empowering effective digital transformation, i...",SME (21-200),Seed,Medium,Serious,Yes,0.31067
510,Rogers-Maldonado,Tech/SaaS,Ethan Allen,Head of Data,Elizabethfurt,"Empowering effective digital transformation, i...",Startup (1-20),Series A,Medium,Professional,Yes,0.31067
46,Ramirez and Sons,Tech/SaaS,Marvin Robles,Head of Data,Lake Virginia,"Empowering effective digital transformation, i...",Startup (1-20),Seed,Low,Creative,Yes,0.31067
144,Cortez-Olson,Marketing,Daniel Martin,Head of Data,Johnsonton,Delivering effective marketing strategies to i...,Mid-size (201-1000),Series C+,High,Serious,Yes,0.276999
843,"Bennett, George and Fox",Marketing,Adrian Prince,Head of Data,South Vanessaville,Delivering effective marketing strategies to i...,Mid-size (201-1000),Series A,Medium,Friendly,No,0.276999
1782,Lin-Mendez,Marketing,Mark Hill,Head of Data,Martinezport,Delivering effective marketing strategies to i...,SME (21-200),Series A,Medium,Creative,Yes,0.276999
2451,"Hayes, Elliott and George",Marketing,Michael Smith,Head of Data,New Davidtown,Delivering effective marketing strategies to i...,SME (21-200),Series B,High,Professional,Yes,0.276999


# 📧 Mail generator

In [46]:
from langchain.chat_models import init_chat_model
from IPython.display import Markdown
import time

model = init_chat_model("gemini-2.0-flash", model_provider="google_genai")

In [37]:
freelance = freelance_df.iloc[0]
freelance

name                                                      Allison Hill
title                                                Digital Marketing
main_sector                                                  Marketing
top3_skills                                   Analytics|Google Ads|SEO
city                                                         East Jill
daily_rate                                                      397.68
mission_statement    Allison Hill delivers trusted solutions in Ana...
preferred_tone                                            Professional
preferred_style                                           Storytelling
remote                                                             Yes
Name: 0, dtype: object

In [38]:
prospect = top_20_df.iloc[0]
prospect

company                                      Spencer, Jones and Miller
sector                                                       Tech/SaaS
main_contact                                               Mark Romero
contact_role                                              Head of Data
city                                                    Lake Dianaland
mission_statement    Empowering effective digital transformation, i...
company_size                                              SME (21-200)
funding_stage                                                     Seed
ticket_size_class                                               Medium
target_tone                                                    Serious
remote                                                              No
similarity                                                     0.31067
Name: 1161, dtype: object

In [39]:
prompt = f"""
Write a concise and professional cold email in English to offer your mission: {freelance['mission_statement']}
to a company called {prospect['company']} based in {prospect['city']}, operating in the {prospect['sector']} sector.
Your contact is {prospect['main_contact']}, who is the {prospect['contact_role']}.

You are {freelance['name']}, a {freelance['title']} specialized in the {freelance['main_sector']} sector,
based in {freelance['city']}. You offer services with top skills in {freelance['top3_skills']},
at a daily rate of {freelance['daily_rate']} (remote: {freelance['remote']}).

Highlight your main value proposition in 2–3 sentences, adapted to the needs of a company like {prospect['company']}
(size: {prospect['company_size']}, funding stage: {prospect['funding_stage']}, remote work: {prospect['remote']}).

Include a clear call to action. Keep the tone {freelance['preferred_tone']} and the style {freelance['preferred_style']},
while matching the company’s target tone: {prospect['target_tone']}.
"""


In [40]:
response = model.invoke(prompt)
Markdown(response.__dict__['content'])

Subject: Innovating Spencer, Jones and Miller's Marketing with Data-Driven Growth

Dear Mark Romero,

My name is Allison Hill, and I'm a digital marketing specialist based in East Jill. I help companies like yours in the Tech/SaaS sector unlock significant growth through data-driven strategies in Analytics and Google Ads.

At Spencer, Jones and Miller, I imagine you're constantly seeking ways to optimize your marketing spend and refine your customer acquisition efforts. I specialize in creating tailored strategies that provide actionable insights and scalable, user-focused campaigns, helping SMEs like yours achieve impactful business success.

Would you be open to a brief 15-minute call next week to discuss how I can help Spencer, Jones and Miller leverage the power of analytics to drive more qualified leads and improve your ROI?

Sincerely,

Allison Hill
[Your Contact Information]

In [51]:
def mail_generator(freelance, top_20_df):
    model = init_chat_model("gemini-2.0-flash", model_provider="google_genai")

    top_20_df['mail'] = ''
    top_20_df = top_20_df.reset_index(drop=True)

    for prospect_id in range(len(top_20_df)):
        # Pause de 60s toutes les 10 requêtes
        if prospect_id > 0 and prospect_id % 10 == 0:
            print("Waiting 30 seconds to respect rate limits...")
            time.sleep(30)

        prospect = top_20_df.iloc[prospect_id]

        prompt = f"""
        Write a concise and professional cold email in English to offer your mission: {freelance['mission_statement']}
        to a company called {prospect['company']} based in {prospect['city']}, operating in the {prospect['sector']} sector.
        Your contact is {prospect['main_contact']}, who is the {prospect['contact_role']}.

        You are {freelance['name']}, a {freelance['title']} specialized in the {freelance['main_sector']} sector,
        based in {freelance['city']}. You offer services with top skills in {freelance['top3_skills']},
        at a daily rate of {freelance['daily_rate']} (remote: {freelance['remote']}).

        Highlight your main value proposition in 2–3 sentences, adapted to the needs of a company like {prospect['company']}
        (size: {prospect['company_size']}, funding stage: {prospect['funding_stage']}, remote work: {prospect['remote']}).

        Include a clear call to action. Keep the tone {freelance['preferred_tone']} and the style {freelance['preferred_style']},
        while matching the company’s target tone: {prospect['target_tone']}.
        """

        try:
            response = model.invoke(prompt)
            content = response.content if hasattr(response, "content") else str(response)
        except Exception as e:
            print(f"Error at index {prospect_id}: {e}")
            content = f"ERROR: {e}"

        top_20_df.at[prospect_id, 'mail'] = content

    return top_20_df


In [52]:
top_20 = mail_geneator(freelance, top_20_df)

In [None]:
top_20

Unnamed: 0,company,sector,main_contact,contact_role,city,mission_statement,company_size,funding_stage,ticket_size_class,target_tone,remote,similarity,mail
0,"Spencer, Jones and Miller",Tech/SaaS,Mark Romero,Head of Data,Lake Dianaland,"Empowering effective digital transformation, i...",SME (21-200),Seed,Medium,Serious,No,0.31067,"Subject: Optimizing Spencer, Jones & Miller's ..."
1,"Joseph, Campbell and Douglas",Tech/SaaS,Tristan Rojas,Head of Data,North Melissa,"Empowering effective digital transformation, i...",Mid-size (201-1000),Seed,Medium,Creative,Yes,0.31067,"Subject: Level Up Joseph, Campbell & Douglas's..."
2,Perez-Smith,Tech/SaaS,Catherine Cole,Head of Data,East Renee,"Empowering effective digital transformation, i...",Enterprise (1000+),Series B,High,Professional,No,0.31067,Subject: Innovating Perez-Smith's Marketing wi...
3,Thomas Group,Tech/SaaS,Alexander Taylor,Head of Data,Patrickberg,"Empowering effective digital transformation, i...",SME (21-200),Seed,Medium,Serious,Yes,0.31067,Subject: Data-Driven Growth for Thomas Group: ...
4,Rogers-Maldonado,Tech/SaaS,Ethan Allen,Head of Data,Elizabethfurt,"Empowering effective digital transformation, i...",Startup (1-20),Series A,Medium,Professional,Yes,0.31067,Subject: Innovating Rogers-Maldonado's Growth ...
5,Ramirez and Sons,Tech/SaaS,Marvin Robles,Head of Data,Lake Virginia,"Empowering effective digital transformation, i...",Startup (1-20),Seed,Low,Creative,Yes,0.31067,Subject: Level Up Ramirez & Sons' Growth with ...
6,Cortez-Olson,Marketing,Daniel Martin,Head of Data,Johnsonton,Delivering effective marketing strategies to i...,Mid-size (201-1000),Series C+,High,Serious,Yes,0.276999,Subject: Innovating Cortez-Olson's Marketing w...
7,"Bennett, George and Fox",Marketing,Adrian Prince,Head of Data,South Vanessaville,Delivering effective marketing strategies to i...,Mid-size (201-1000),Series A,Medium,Friendly,No,0.276999,"Subject: Innovating Bennett, George and Fox's ..."
8,Lin-Mendez,Marketing,Mark Hill,Head of Data,Martinezport,Delivering effective marketing strategies to i...,SME (21-200),Series A,Medium,Creative,Yes,0.276999,Subject: Innovating Lin-Mendez's Marketing wit...
9,"Hayes, Elliott and George",Marketing,Michael Smith,Head of Data,New Davidtown,Delivering effective marketing strategies to i...,SME (21-200),Series B,High,Professional,Yes,0.276999,"Subject: Innovating Hayes, Elliott and George'..."


In [59]:
Markdown(top_20.iloc[4]['mail'])

Subject: Innovating Rogers-Maldonado's Growth with Data-Driven Marketing

Dear Ethan Allen,

As Head of Data at Rogers-Maldonado, you're likely focused on leveraging data to fuel growth. I'm Allison Hill, a digital marketing specialist based in East Jill, and I help Tech/SaaS companies like yours translate data into actionable strategies for impactful results. I specialize in using Analytics and Google Ads to optimize marketing spend and drive user acquisition, which can be particularly valuable during your Series A funding stage.

Having worked with startups navigating similar growth phases, I understand the need for scalable, user-focused marketing. I believe my expertise in Analytics, Google Ads, and SEO can help Rogers-Maldonado unlock its full potential.

Would you be open to a brief 15-minute call to discuss how I can help you optimize your marketing strategy?

Sincerely,

Allison Hill
Digital Marketing Specialist
(Remote: Yes, Daily Rate: 397.68)