<a href="https://colab.research.google.com/github/FPM22/farcaster-profile-classifier/blob/main/BuildersDataEnrichment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade openai gspread oauth2client

from openai import OpenAI
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
from google.colab import files
import json

# Upload your credentials file
uploaded = files.upload()

# === CONFIGURATION ===

# Create OpenAI client
client = OpenAI(api_key='sk-proj-...')

# Google Sheets authentication
credentials = ServiceAccountCredentials.from_json_keyfile_name(
    'credenciales.json',
    ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
)

# Google Sheet ID and sheet name
sheet_id = '1ZyvpG5s8ASUVLnkwczniv3Mtxqt1ZIE-tYbC0esgtNE'
sheet_name = 'builders_list'  # make sure the name is correct

# === LOAD DATA ===

gc = gspread.authorize(credentials)
sheet = gc.open_by_key(sheet_id)
worksheet = sheet.worksheet(sheet_name)
data = pd.DataFrame(worksheet.get_all_records())

# Filter rows that need classification
df_to_categorize = data[(data['category'] == '') | (data['commercial_category'] == '')]

# === GPT FUNCTIONS ===

def classify_bio_with_gpt(bio):
    prompt = f"""
Given this Farcaster user profile:

"{bio}"

Assign two tags:

- category: choose one from Founder, Employee, Active Community Member, or N/A.
- commercial_category: choose one from Business, Community, or N/A.

Respond in JSON like this: {{"category": "...", "commercial_category": "..."}}
    """
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        content = response.choices[0].message.content
        result = json.loads(content)
        return result['category'], result['commercial_category']
    except Exception as e:
        print(f"⚠️ Error with bio: {bio}\n{e}")
        return "N/A", "N/A"

# === AUTOMATIC CLASSIFICATION ===

for idx, row in df_to_categorize.iterrows():
    print(f"🔍 Classifying row {idx}...")
    cat, com_cat = classify_bio_with_gpt(row['bio'])
    data.at[idx, 'category'] = cat
    data.at[idx, 'commercial_category'] = com_cat

# === OPTIONAL LOCAL BACKUP ===
data.to_csv("backup_builders_list.csv", index=False)

# === UPDATE SHEET ===
worksheet.update([data.columns.values.tolist()] + data.values.tolist())
print("✅ Classification completed and sheet updated.")
