In [None]:
pip install transformers



In [None]:
from transformers import pipeline

class InfluencerDetector:
    def __init__(self):
        self.classifier = pipeline("zero-shot-classification",
                                 model="facebook/bart-large-mnli")
        self.labels = [
            "Working with Instagram/TikTok/YouTube influencers",
            "Finding people to review or showcase my product",
            "Seeking brand ambassadors or sponsored content",
            "Other type of marketing (ads, SEO, etc.)",
            "Completely unrelated request"
        ]

    def check_influencer_request(self, text: str) -> str:
        result = self.classifier(text, self.labels)
        top_choice = result["labels"][0]

        if top_choice in self.labels[:3]:  # First 3 = influencer-related
            return f"✅ Influencer Matching (Confidence: {result['scores'][0]:.0%}) - Detected as: '{top_choice}'"
        else:
            return f"❌ Other (Confidence: {result['scores'][0]:.0%}) - This is: '{top_choice}'"

# Initialize the detector
detector = InfluencerDetector()

print("Influencer Request Detector")
print("Type 'exit' to quit\n")

while True:
    user_input = input("Enter your request: ").strip()
    if user_input.lower() in ['exit', 'quit']:
        break

    if not user_input:
        continue

    print("\n" + "="*50)
    print(f"Input: '{user_input}'")
    print(detector.check_influencer_request(user_input))
    print("="*50 + "\n")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


Influencer Request Detector
Type 'exit' to quit

Enter your request: i sell laptops and i need to promote my product

Input: 'i sell laptops and i need to promote my product'
✅ Influencer Matching (Confidence: 70%) - Detected as: 'Finding people to review or showcase my product'



In [27]:
from transformers import pipeline

# Load zero-shot classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Add richer label descriptions
labels = [
    "fashion (clothing, bags, accessories)",
    "beauty (makeup, skincare, cosmetics)",
    "electronics (gadgets, tech, devices)"
]

# Get user input
user_input = input("Describe your product or business: ")

# Classify
result = classifier(user_input, labels)

# Extract clean domain
detected = result["labels"][0].split()[0]

print("\n✅ Detected Domain:", detected)


# Warn if low confidence
if abs(result["scores"][0] - result["scores"][1]) < 0.05:
    print("\n⚠️  Warning: Prediction is uncertain. Consider manual review.")


Device set to use cuda:0


Describe your product or business: i want a small business for lip gloss

✅ Detected Domain: beauty


In [1]:
pip install pandas openpyxl



In [4]:
import pandas as pd

# Read the Excel file
df_inf = pd.read_excel('Ghty.xlsx', engine='openpyxl')

# Display the data
print(df_inf.head())

                             Nom                   IG_Handle Followers  \
0                 ʟᴏᴜʙɴᴀ ʙᴀɢʜᴅᴀᴅ              @loubnabaghdad    844.6K   
1                  Afrah chiadli   @afrah_giiordaniiofficiel    826.1K   
2                Lamyae Alahyane             @lamyaealahyane      713K   
3                      Faty Elmm  @fatima_zahra_el_moussaoui    658.1K   
4  Houda Laaroussi | هدى العروسي        @houda_laaroussi_bhr    600.3K   

   Topics                                     Lien Instagram  country  
0  beauty            https://www.instagram.com/loubnabaghdad  morocco  
1  beauty  https://www.instagram.com/afrah_giiordaniioffi...  morocco  
2  beauty           https://www.instagram.com/lamyaealahyane  morocco  
3  beauty  https://www.instagram.com/fatima_zahra_el_mous...  morocco  
4  beauty      https://www.instagram.com/houda_laaroussi_bhr  morocco  


In [26]:
from transformers import pipeline
import pandas as pd
import spacy

# ===== NEW: Demonym-to-country mapping =====
DEMONYM_MAP = {
    "tunisian": "tunisia",
    "turkish": "turkey",
    "moroccan": "morocco",
    "algerian": "algeria",
    "french": "france",
    "italian": "italy",
    "spanish": "spain",
    "korean": "south-korea",
    "brazilian": "brazil",
    "argentinian": "argentina",
    "australian": "australia",
    "swiss": "switzerland",
    "belgian": "belgium",
    "emirati": "united-arab-emirates"
}

def detect_domain_and_country():
    # Load zero-shot classifier for domain
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

    # Load spaCy model for country detection
    try:
        nlp = spacy.load("en_core_web_sm")
    except OSError:
        print("Downloading spaCy model...")
        import subprocess
        subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
        nlp = spacy.load("en_core_web_sm")

    # Get user input
    user_input = input("Describe your product or business: ")

    # Detect domain
    domain_labels = [
        "fashion (clothing, bags, accessories)",
        "beauty (makeup, skincare, cosmetics)",
        "technology (gadgets, tech, devices)"
    ]
    domain_result = classifier(user_input, domain_labels)
    detected_domain = domain_result["labels"][0].split()[0]

    # ===== UPDATED COUNTRY DETECTION =====
    doc = nlp(user_input.lower())
    detected_country = None

    # Check for country names (GPE entities)
    countries = [ent.text.lower() for ent in doc.ents if ent.label_ == "GPE"]
    if countries:
        detected_country = countries[0]

    # Check for demonyms (e.g., "Tunisian")
    for token in doc:
        if token.text in DEMONYM_MAP:
            detected_country = DEMONYM_MAP[token.text]
            break
    # ===== END OF UPDATE =====

    print("\n📌 Detected Domain:", detected_domain)
    if detected_country:
        print("🌍 Detected Country:", detected_country.capitalize())


    if abs(domain_result["scores"][0] - domain_result["scores"][1]) < 0.05:
        print("\n⚠️ Warning: Domain prediction is uncertain. Consider manual review.")

    return detected_domain, detected_country

def get_influencers(detected_domain, detected_country=None):
    # Load dataset
    df = pd.read_excel('Ghty.xlsx', engine='openpyxl')

    # Clean follower counts
    df['Followers_Numeric'] = df['Followers'].str.replace('K', '').astype(float) * 1000

    # Filter by detected domain in Topics column
    if 'Topics' in df.columns:
        df = df[df['Topics'].str.lower().str.contains(detected_domain.lower())]
    else:
        print("Warning: 'Topics' column not found. Showing all influencers.")

    # Filter by country if detected
    if detected_country:
        df['country_lower'] = df['country'].str.lower()
        df = df[df['country_lower'].str.contains(detected_country, na=False)]
        if df.empty:
            print(f"\n⚠️ No influencers found in {detected_country.capitalize()}. Showing global results.")
            df = pd.read_excel('Ghty.xlsx', engine='openpyxl')
            df['Followers_Numeric'] = df['Followers'].str.replace('K', '').astype(float) * 1000
            if 'Topics' in df.columns:
                df = df[df['Topics'].str.lower().str.contains(detected_domain.lower())]

    # Currency conversion rates
    currency_rates = {
        'morocco': {'currency': 'MAD', 'rate': 10.0},
        'algeria': {'currency': 'DZD', 'rate': 134.0},
        'turkey': {'currency': 'TRY', 'rate': 18.5},
        'united-arab-emirates': {'currency': 'AED', 'rate': 3.67},
        'australia': {'currency': 'AUD', 'rate': 1.5},
        'switzerland': {'currency': 'CHF', 'rate': 0.92},
        'belgium': {'currency': 'EUR', 'rate': 0.93},
        'brazil': {'currency': 'BRL', 'rate': 5.2},
        'south-korea': {'currency': 'KRW', 'rate': 1300.0},
        'france': {'currency': 'EUR', 'rate': 0.93},
        'italy': {'currency': 'EUR', 'rate': 0.93},
        'spain': {'currency': 'EUR', 'rate': 0.93},
        'tunisia': {'currency': 'TND', 'rate': 3.1},
        'argentina': {'currency': 'ARS', 'rate': 350.0}
    }

    # Standardize country names
    df['country_clean'] = df['country'].str.lower()

    # Salary estimation function
    def estimate_salary(followers):
        if followers < 100000:
            return followers * 0.01  # $10 per 1K
        elif followers < 500000:
            return followers * 0.02  # $20 per 1K
        elif followers < 1000000:
            return followers * 0.03  # $30 per 1K
        else:
            return followers * 0.05  # $50 per 1K

    # Calculate USD salary
    df['Salary_USD'] = df['Followers_Numeric'].apply(estimate_salary)

    # Convert to local currency
    def convert_currency(row):
        country = row['country_clean']
        if country in currency_rates:
            rate = currency_rates[country]['rate']
            currency = currency_rates[country]['currency']
            return row['Salary_USD'] * rate, currency
        return row['Salary_USD'], 'USD'

    # Fix for the ValueError - ensure we're creating exactly two columns
    currency_conversion = df.apply(convert_currency, axis=1)
    df['Salary_Local'] = currency_conversion.apply(lambda x: x[0])
    df['Currency'] = currency_conversion.apply(lambda x: x[1])

    # Get user input for budget
    while True:
        budget_input = input("\nEnter your budget in USD (or press Enter to see top influencers): ").strip()

        if not budget_input:
            # No budget provided
            result = df.sort_values('Followers_Numeric', ascending=False).head(5)
            if detected_country:
                print(f"\nTop 5 {detected_domain} influencers in {detected_country.capitalize()}:")
            else:
                print(f"\nTop 5 {detected_domain} influencers globally:")
            break

        try:
            budget_usd = float(budget_input)
            if budget_usd <= 0:
                print("Please enter a positive budget amount.")
                continue

            # Filter by budget
            result = df[df['Salary_USD'] <= budget_usd]\
                     .sort_values('Followers_Numeric', ascending=False)\
                     .head(5)

            if len(result) == 0:
                location = f"in {detected_country.capitalize()}" if detected_country else "globally"
                print(f"No {detected_domain} influencers found {location} within ${budget_usd:,.2f} budget.")
                continue

            location = f"in {detected_country.capitalize()}" if detected_country else "globally"
            print(f"\nTop 5 {detected_domain} influencers {location} within ${budget_usd:,.2f} budget:")
            break

        except ValueError:
            print("Please enter a valid number for the budget.")
            continue

    # Display results
    display_cols = ['Topics', 'Nom', 'IG_Handle', 'Followers', 'country',
                   'Salary_USD', 'Salary_Local', 'Currency']

    # Format the output as a proper table
    print("\n" + result[display_cols].to_markdown(index=False, stralign="left"))

    return result

# Main execution
if __name__ == "__main__":
    domain, country = detect_domain_and_country()
    get_influencers(domain, country)

Device set to use cuda:0


Describe your product or business: Hello , i sell clothes online i need a tunisian influencer to promote my damn products give me some suggestions

📌 Detected Domain: fashion
🌍 Detected Country: Tunisia

Enter your budget in USD (or press Enter to see top influencers): 4000

Top 5 fashion influencers in Tunisia within $4,000.00 budget:

| Topics   | Nom                            | IG_Handle         | Followers   | country   |   Salary_USD |   Salary_Local | Currency   |
|:---------|:-------------------------------|:------------------|:------------|:----------|-------------:|---------------:|:-----------|
| fashion  | Nesrine HITANA                 | @nesrine_hitana   | 195.1K      | Tunisia   |         3902 |        12096.2 | TND        |
| fashion  | 𝐊𝐡𝐚𝐰𝐥𝐚 𝐁𝐨𝐮𝐤𝐡𝐫𝐢𝐬                | @khawlaboukhris   | 189K        | Tunisia   |         3780 |        11718   | TND        |
| fashion  | Hanen Benmnaouar/ Blogger_food | @hanenbenmnaouar  | 182.5K      | Tunisia   |         3650 |       