In [1]:
# Caprae Capital AI-Readiness Lead Generation Challenge

## 1. Introduction & Business Rationale
This notebook demonstrates a smart lead generation tool for SaaS companies. It loads, cleans, enriches, scores, and filters leads, providing an interactive and exportable workflow.

SyntaxError: invalid syntax (534736510.py, line 4)

In [None]:
## 2. Data Loading
Load the SaaS company dataset.

In [2]:
import pandas as pd

df = pd.read_csv('sample_saas_companies.csv')
df.head()

Unnamed: 0,name,website,linkedin,industry,size,country
0,LinkedIn,https://linkedin.com,https://linkedin.com/company/linkedin,Social Media,10000+,US
1,GitHub,https://github.com,https://linkedin.com/company/github,Software Development,1000-5000,US
2,Canva,https://canva.com,https://linkedin.com/company/canva,Design,500-1000,AU
3,Stripe,https://stripe.com,https://linkedin.com/company/stripe,Financial Services,1000-5000,US
4,Shopify,https://shopify.com,https://linkedin.com/company/shopify,E-commerce,5000-10000,CA


In [None]:
## 3. Data Cleaning & Deduplication
Remove duplicates and normalize data.

In [3]:
df = df.drop_duplicates(subset=['name', 'website'])
df = df.dropna(subset=['name', 'website'])
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,name,website,linkedin,industry,size,country
0,LinkedIn,https://linkedin.com,https://linkedin.com/company/linkedin,Social Media,10000+,US
1,GitHub,https://github.com,https://linkedin.com/company/github,Software Development,1000-5000,US
2,Canva,https://canva.com,https://linkedin.com/company/canva,Design,500-1000,AU
3,Stripe,https://stripe.com,https://linkedin.com/company/stripe,Financial Services,1000-5000,US
4,Shopify,https://shopify.com,https://linkedin.com/company/shopify,E-commerce,5000-10000,CA


In [None]:
## 4. Lead Enrichment
Add LinkedIn and website info (already present in this sample).

In [None]:
## 5. Lead Scoring
Score leads based on size, country, and industry.

In [4]:
# Example scoring: prioritize US-based, 1000+ employees, and 'Software' or 'Productivity' industries
def score_lead(row):
    score = 0
    if row['country'] == 'US':
        score += 2
    if '1000' in str(row['size']):
        score += 2
    if row['industry'] in ['Software Development', 'Productivity', 'Collaboration']:
        score += 1
    return score

df['score'] = df.apply(score_lead, axis=1)
df = df.sort_values(by='score', ascending=False)
df.head()

Unnamed: 0,name,website,linkedin,industry,size,country,score
1,GitHub,https://github.com,https://linkedin.com/company/github,Software Development,1000-5000,US,5
8,Slack,https://slack.com,https://linkedin.com/company/slack,Collaboration,1000-5000,US,5
0,LinkedIn,https://linkedin.com,https://linkedin.com/company/linkedin,Social Media,10000+,US,4
3,Stripe,https://stripe.com,https://linkedin.com/company/stripe,Financial Services,1000-5000,US,4
6,Zendesk,https://zendesk.com,https://linkedin.com/company/zendesk,Customer Service,1000-5000,US,4


In [None]:
## 6. Interactive Filtering & Export
Use widgets to filter leads and export results.

In [5]:
import ipywidgets as widgets
from IPython.display import display

slider = widgets.IntSlider(value=5, min=0, max=10)
display(slider)

IntSlider(value=5, max=10)

In [6]:
print(df.columns)
print(df.head())
print(df['score'].max())

Index(['name', 'website', 'linkedin', 'industry', 'size', 'country', 'score'], dtype='object')
       name               website                               linkedin  \
1    GitHub    https://github.com    https://linkedin.com/company/github   
8     Slack     https://slack.com     https://linkedin.com/company/slack   
0  LinkedIn  https://linkedin.com  https://linkedin.com/company/linkedin   
3    Stripe    https://stripe.com    https://linkedin.com/company/stripe   
6   Zendesk   https://zendesk.com   https://linkedin.com/company/zendesk   

               industry       size country  score  
1  Software Development  1000-5000      US      5  
8         Collaboration  1000-5000      US      5  
0          Social Media     10000+      US      4  
3    Financial Services  1000-5000      US      4  
6      Customer Service  1000-5000      US      4  
5


In [7]:
import ipywidgets as widgets
from IPython.display import display

industry_options = ['All'] + sorted(df['industry'].unique())
country_options = ['All'] + sorted(df['country'].unique())
size_options = ['All'] + sorted(df['size'].unique())

industry_widget = widgets.Dropdown(options=industry_options, description='Industry:')
country_widget = widgets.Dropdown(options=country_options, description='Country:')
size_widget = widgets.Dropdown(options=size_options, description='Size:')
score_widget = widgets.IntSlider(value=0, min=0, max=df['score'].max(), step=1, description='Min Score:')

filtered_df = df.copy()  # To allow export after filtering

def filter_leads(industry, country, size, min_score):
    global filtered_df
    filtered = df.copy()
    if industry != 'All':
        filtered = filtered[filtered['industry'] == industry]
    if country != 'All':
        filtered = filtered[filtered['country'] == country]
    if size != 'All':
        filtered = filtered[filtered['size'] == size]
    filtered = filtered[filtered['score'] >= min_score]
    filtered_df = filtered  # Save for export
    display(filtered)
    return filtered

ui = widgets.VBox([industry_widget, country_widget, size_widget, score_widget])
out = widgets.interactive_output(
    filter_leads, 
    {'industry': industry_widget, 'country': country_widget, 'size': size_widget, 'min_score': score_widget}
)
display(ui, out)

VBox(children=(Dropdown(description='Industry:', options=('All', 'Collaboration', 'Customer Service', 'Design'…

Output()

In [None]:
# Export filtered leads to CSV
# After filtering, run this cell to export the current filtered leads:
filtered_df.to_csv('filtered_leads.csv', index=False)
print("Exported filtered leads to filtered_leads.csv")

In [None]:
## 7. Conclusion & 1-Page Rationale
See `report.md` for a summary of design choices and business rationale.