# Load Your Data & Model

In [None]:
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification

# Load CSV data
df = pd.read_csv("../data/telegram_data.csv", parse_dates=["Date"])

# Load NER model pipeline
model_path = "models/xlm-roberta-ner"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")


# Extract Prices Using NER

In [None]:
import re

def extract_prices(text):
    entities = ner_pipeline(text)
    prices = []
    for e in entities:
        if e["entity_group"] == "PRICE":
            # Clean up price string (remove commas, extract numbers)
            numbers = re.findall(r'\d+(?:,\d+)?', e["word"].replace(",", ""))
            for n in numbers:
                prices.append(int(n))
    return prices


# Vendor-Level Analytics Engine

In [None]:
from collections import defaultdict

vendor_stats = []

# Group data by vendor channel
for channel, group in df.groupby("Channel Username"):
    group = group.sort_values("Date")
    
    # Activity & Consistency
    post_dates = pd.to_datetime(group["Date"])
    weeks = (post_dates.max() - post_dates.min()).days / 7 or 1
    posting_freq = len(group) / weeks

    # Views
    avg_views = group["Views"].mean() if "Views" in group.columns else 0
    top_post = group.loc[group["Views"].idxmax()] if "Views" in group.columns else None

    # NER: Extract prices
    all_prices = []
    for msg in group["Message"]:
        if isinstance(msg, str):
            all_prices.extend(extract_prices(msg))
    avg_price = sum(all_prices) / len(all_prices) if all_prices else 0

    # Lending Score (you can customize weights)
    score = (avg_views * 0.5) + (posting_freq * 0.5)

    vendor_stats.append({
        "Channel": channel,
        "Posts/Week": round(posting_freq, 2),
        "Avg. Views/Post": round(avg_views, 2),
        "Avg. Price (ETB)": round(avg_price, 2),
        "Lending Score": round(score, 2),
        "Top Product": top_post["Message"] if top_post is not None else "N/A",
        "Top Views": top_post["Views"] if top_post is not None else "N/A",
    })


# Save Scorecard Table

In [None]:
scorecard_df = pd.DataFrame(vendor_stats)
scorecard_df = scorecard_df.sort_values("Lending Score", ascending=False)

# Save to CSV
scorecard_df.to_csv("../data/vendor_scorecard.csv", index=False)

# Preview
scorecard_df[["Channel", "Avg. Views/Post", "Posts/Week", "Avg. Price (ETB)", "Lending Score"]]
