In [3]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import pandas as pd
from datetime import datetime, timedelta
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification

# ✅ Load your Telegram data CSV
df = pd.read_csv('telegram_data.csv')  # Columns: channel, message, date, views

In [17]:
df.head()

Unnamed: 0,Channel,Username,Message ID,Text,Date,Media Path
0,Sheger online-store,@Shageronlinestore,7394,,2025-06-20 11:47:53+00:00,../data/telegram_media\@Shageronlinestore_7394...
1,Sheger online-store,@Shageronlinestore,7393,💥 1L Water Bottle\n\n 💯High Quality\n\n⚡...,2025-06-20 11:47:53+00:00,../data/telegram_media\@Shageronlinestore_7393...
2,Sheger online-store,@Shageronlinestore,7392,,2025-06-20 09:03:23+00:00,../data/telegram_media\@Shageronlinestore_7392...
3,Sheger online-store,@Shageronlinestore,7391,💥 Sonifer Steam Iron \n\n የልብስ መቶከሻ\n\n💯 ...,2025-06-20 09:03:23+00:00,../data/telegram_media\@Shageronlinestore_7391...
4,Sheger online-store,@Shageronlinestore,7390,💥Sayona multifunctional juicer and extractor\n...,2025-06-20 06:48:11+00:00,../data/telegram_media\@Shageronlinestore_7390...


In [18]:
# ✅ Convert date
df['Date'] = pd.to_datetime(df['Date'])

In [19]:
# ✅ Load NER pipeline
model_path = "/content/drive/MyDrive/models/xlm-roberta-base-ner"
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
model = AutoModelForTokenClassification.from_pretrained(model_path, local_files_only=True)

In [20]:

ner_pipeline = pipeline(
    "token-classification",
    model=model,
    tokenizer=tokenizer,
    aggregation_strategy="simple"
)

Device set to use cpu


In [23]:
# ✅ Function to extract price from text
def extract_price(text):
    results = ner_pipeline(text)
    prices = [r['word'] for r in results if r['entity_group'] == 'PRICE']
    prices = [float(''.join(filter(str.isdigit, p))) for p in prices if any(c.isdigit() for c in p)]
    return prices

# ✅ Initialize list to store results
vendor_scores = []

In [26]:
# ✅ Process each vendor (channel)
for vendor in df['Channel'].unique():
    vendor_data = df[df['Channel'] == vendor]

    # Posts per week
    total_days = (vendor_data['Date'].max() - vendor_data['Date'].min()).days
    weeks = max(total_days / 7, 1)
    posts_per_week = len(vendor_data) / weeks

    # Avg views - Removed as 'Views' column is not present
    # avg_views = vendor_data['Views'].mean()

    # Top performing post - Removed as 'Views' column is not present
    # top_post = vendor_data.loc[vendor_data['Views'].idxmax()]
    # top_post_message = top_post['Text']
    # top_post_views = top_post['Views']
    # top_post_prices = extract_price(top_post_message)
    # top_post_price = top_post_prices[0] if top_post_prices else None


    # Average price across all posts
    all_prices = []
    for text in vendor_data['Text'].dropna(): # Added dropna() to handle NaN values in text
        all_prices += extract_price(text)
    avg_price = sum(all_prices) / len(all_prices) if all_prices else 0

    # Lending Score (Example formula) - Adjusted formula
    lending_score = (posts_per_week * 0.7) + (avg_price * 0.3) # Increased weight for posts per week

    # Append to results
    vendor_scores.append({
        'Vendor': vendor,
        # 'Avg Views/Post': round(avg_views, 2), # Removed
        'Posts/Week': round(posts_per_week, 2),
        'Avg Price (ETB)': round(avg_price, 2),
        'Lending Score': round(lending_score, 2),
        # 'Top Post Views': top_post_views, # Removed
        # 'Top Post Price': top_post_price, # Removed
        # 'Top Post Text': top_post_message[:80] + '...' if len(top_post_message) > 80 else top_post_message # Removed
    })

# ✅ Create Scorecard DataFrame
scorecard = pd.DataFrame(vendor_scores)
print(scorecard)

# ✅ Save to CSV
scorecard.to_csv('vendor_scorecard.csv', index=False)

                Vendor  Posts/Week  Avg Price (ETB)  Lending Score
0  Sheger online-store       52.24                0          36.57
1       Zemen Express®       43.48                0          30.43
2       NEVA COMPUTER®        9.55                0           6.68
3                 መነሻዬ        6.40                0           4.48
4          EthioBrand®       10.61                0           7.42
5                ልዩ እቃ       42.17                0          29.52
6         HellooMarket       13.83                0           9.68
