In [4]:
import pandas as pd
import numpy as np

filename = "/content/top_posts_50_likes_3_replies_5_rt.csv"
author_data_dict = {}

try:
    # 1. Load and process the data from the CSV file (same as before)
    print(f"🔄 Loading data from {filename}...")
    df = pd.read_csv(filename)

    required_cols = ['author.userName', 'author.id', 'viewCount', 'quoteCount', 'retweetCount', 'replyCount', 'likeCount']
    if not all(col in df.columns for col in required_cols):
        missing = [col for col in required_cols if col not in df.columns]
        raise ValueError(f"Missing required columns: {', '.join(missing)}")

    engagement_cols = ['viewCount', 'quoteCount', 'retweetCount', 'replyCount', 'likeCount']
    df[engagement_cols] = df[engagement_cols].fillna(0).astype(int)

    print("⚙️  Processing initial data...")
    for index, row in df.iterrows():
        author_username = row['author.userName']
        if pd.notna(author_username):
            engagement_score = (row['viewCount'] + row['quoteCount'] * 1000 + row['retweetCount'] * 500 + row['replyCount'] * 200 + row['likeCount'] * 100)
            if author_username not in author_data_dict:
                author_data_dict[author_username] = {'id': row['author.id'], 'tweets': 1, 'engagement': engagement_score}
            else:
                author_data_dict[author_username]['tweets'] += 1
                author_data_dict[author_username]['engagement'] += engagement_score

    # 2. Calculate totals needed for your formula
    num_authors = len(author_data_dict)
    total_tweets = sum(data['tweets'] for data in author_data_dict.values())
    total_engagement = sum(data['engagement'] for data in author_data_dict.values())

    print("📊 Calculating final scores based on your formula...")
    final_results = []

    # Check for division by zero
    if total_tweets == 0 or total_engagement == 0:
        raise ValueError("Total tweets or total engagement is zero, cannot calculate scores.")

    # 3. Loop through each author to calculate their final score
    for author_name, data in author_data_dict.items():
        their_num_tweet = data['tweets']
        their_engagement = data['engagement']

        # Your formula: 0.5 * num_authors * their_num_tweet / total_tweets + 0.5 * num_authors * their_engagement / total_engagement
        tweet_component = 0.5 * num_authors * their_num_tweet / total_tweets
        engagement_component = 0.5 * num_authors * their_engagement / total_engagement
        final_score = tweet_component + engagement_component

        final_results.append({
            'author': author_name,
            'tweets': their_num_tweet,
            'engagement': their_engagement,
            'final_score': final_score
        })

    # 4. Sort results by the final score
    sorted_results = sorted(final_results, key=lambda x: x['final_score'], reverse=True)

    # 5. Print the final ranked list
    print("\n--- 🏆 Authors Ranked by Final Score (Average = 1.0) ---")
    for result in sorted_results:
        print(f"{result['author']} - Final Score: {result['final_score']:.2f} "
              f"({result['tweets']} tweets, {result['engagement']:,} engagement)")

except FileNotFoundError:
    print(f"❌ Error: The file '{filename}' was not found.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

# --- 6. Prepare and Save Final Results to CSV ---
output_filename = "author_scores.csv"

# Add the author ID to the results and rename 'final_score' to 'weight'
final_data_for_csv = []
for result in sorted_results:
    author = result['author']
    final_data_for_csv.append({
        'author': author,
        'id': author_data_dict[author]['id'],
        'num_tweets': result['tweets'],
        'engagement': result['engagement'],
        'weight': result['final_score']
    })

print(f"\n💾 Saving results to {output_filename}...")

# Create a DataFrame with the correct columns and order
output_df = pd.DataFrame(final_data_for_csv)
output_df = output_df[['author', 'id', 'num_tweets', 'engagement', 'weight']]

# Save the DataFrame to a CSV file, without the index column
output_df.to_csv(output_filename, index=False)

print(f"✅ Successfully saved the ranked results to {output_filename}")

🔄 Loading data from /content/top_posts_50_likes_3_replies_5_rt.csv...
⚙️  Processing initial data...
📊 Calculating final scores based on your formula...

--- 🏆 Authors Ranked by Final Score (Average = 1.0) ---
DreadBong0 - Final Score: 9.53 (17 tweets, 1,724,229 engagement)
JosephJacks_ - Final Score: 7.31 (19 tweets, 898,793 engagement)
BarrySilbert - Final Score: 5.39 (7 tweets, 1,162,231 engagement)
markjeffrey - Final Score: 4.47 (12 tweets, 523,187 engagement)
AltcoinDaily - Final Score: 3.44 (2 tweets, 915,599 engagement)
Carloxir - Final Score: 3.31 (8 tweets, 450,446 engagement)
opentensor - Final Score: 3.14 (7 tweets, 470,124 engagement)
DrocksAlex2 - Final Score: 2.64 (7 tweets, 314,648 engagement)
Defi0xJeff - Final Score: 2.55 (5 tweets, 430,124 engagement)
0xPrismatic - Final Score: 2.33 (5 tweets, 360,675 engagement)
SiamKidd - Final Score: 2.19 (6 tweets, 247,735 engagement)
coingecko - Final Score: 2.05 (2 tweets, 488,068 engagement)
Grayscale - Final Score: 2.03 (4 tw