In [13]:
import json
import pandas as pd
import os

In [14]:
def returnValueFromData(data, key):
    temp = data.copy()
    for i in key.split('.'):
        if isinstance(temp, dict) and i in temp:
            temp = temp[i]
        else:
            return None
    return temp

def analyze_tweets(df):
    """Analyzes tweets and highlights those above average engagement."""

    # Calculate average engagement metrics
    avg_favorite = df['favorite_count'].mean()
    avg_retweet = df['retweet_count'].mean()
    avg_reply = df['reply_count'].mean()

    # Create boolean columns indicating above-average engagement
    df['above_avg_favorite'] = df['favorite_count'] > avg_favorite
    df['above_avg_retweet'] = df['retweet_count'] > avg_retweet
    df['above_avg_reply'] = df['reply_count'] > avg_reply

    # Create a combined "above_average" column
    df['above_average_engagement'] = (
        df['above_avg_favorite'] | df['above_avg_retweet'] | df['above_avg_reply']
    )
    return df, avg_favorite, avg_retweet, avg_reply  # Return averages


# --- Load Tweets Data ---
json_filename = "JeffBezos_first_10_tweets.json"  # Store the filename
with open(json_filename, "r") as json_file:
    data_tweets = json.load(json_file)

# --- Tweet Data Extraction ---
tweet_key_to_key_mapping = {
    "favorite_count": "legacy.favorite_count",
    "full_text": "legacy.full_text",
    "quote_count": "legacy.quote_count",
    "reply_count": "legacy.reply_count",
    "retweet_count": "legacy.retweet_count",
    "name": "core.user_results.result.legacy.name",
    "followers_count": "core.user_results.result.legacy.followers_count"
}

tweets_list = []
for tweet in data_tweets:
    tweet_json = {}
    for key, json_key in tweet_key_to_key_mapping.items():
        tweet_json[key] = returnValueFromData(tweet, json_key)
    tweets_list.append(tweet_json)

tweets_df = pd.DataFrame(tweets_list)

# --- Analyze Tweets (Highlight Above Average) ---
tweets_df, avg_favorite, avg_retweet, avg_reply = analyze_tweets(tweets_df)  # Get averages


# --- Display Results ---
print("All Tweets:")
print(tweets_df[[
    "name", "followers_count", "full_text", "favorite_count", "retweet_count", "reply_count", "quote_count"
]])

print("\nTweets with Above-Average Engagement:")
print(tweets_df[tweets_df['above_average_engagement']][[
     "name", "followers_count", "full_text", "favorite_count", "retweet_count", "reply_count", "quote_count"
]])


# --- Display Average Values ---
print("\n--- Average Engagement Metrics ---")
print(f"Average Favorite Count: {avg_favorite:.2f}")
print(f"Average Retweet Count: {avg_retweet:.2f}")
print(f"Average Reply Count: {avg_reply:.2f}")

# --- Optional:  Display the boolean columns for analysis ---
print("\nAnalysis Columns (Optional):")
print(tweets_df[[
    'above_avg_favorite', 'above_avg_retweet', 'above_avg_reply', 'above_average_engagement'
]])

# --- Save to CSV (Including Averages) ---
# 1. Add averages to the DataFrame
tweets_df['average_favorite_count'] = avg_favorite
tweets_df['average_retweet_count'] = avg_retweet
tweets_df['average_reply_count'] = avg_reply

# 2. Generate CSV filename
base_filename = os.path.splitext(json_filename)[0]  # Remove .json
csv_filename = f"{base_filename}_analyzed.csv"      # Add _analyzed.csv

# 3. Save to CSV
tweets_df.to_csv(csv_filename, index=False)

All Tweets:
         name  followers_count  \
0  Jeff Bezos          6793553   
1  Jeff Bezos          6793553   
2  Jeff Bezos          6793553   
3  Jeff Bezos          6793553   
4  Jeff Bezos          6793553   
5  Jeff Bezos          6793553   
6  Jeff Bezos          6793553   
7  Jeff Bezos          6793553   
8  Jeff Bezos          6793553   
9  Jeff Bezos          6793553   

                                           full_text  favorite_count  \
0  Big congratulations to our 45th and now 47th P...          280039   
1  Leo, come over here, I want to show you someth...          241790   
2  Our former President showed tremendous grace a...          214830   
3  My first job. And still the same great burger....          190743   
4  Ouch. Inflation is far too important a problem...          176425   
5  The newly created Disinformation Board should ...          151704   
6    @elonmusk @realDonaldTrump Nope. 100% not true.          138248   
7  Congratulations to @ElonMusk and t