In [4]:
import happybase

# User Analysis
---

## Top 3 Followed users

In [21]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)

table = connection.table('user_table')

# Scan the table to retrieve user data
scan_result = table.scan(columns=['data:followers'])

# Create a dictionary to store user followers
user_followers = {}

# Process the scan result
for key, data in scan_result:
    user_id = key.decode('utf-8')
    followers = int(data[b'data:followers'].decode('utf-8'))
    user_followers[user_id] = followers

# Close the HBase connection
connection.close()

# Sort the users by followers and get the top users
top_users = sorted(user_followers.items(), key=lambda item: item[1], reverse=True)[:3]

# Print the top users and their followers
for user_id, followers in top_users:
    print(f"User ID: {user_id}, Followers: {followers}")


User ID: 109999724950289416, Followers: 3340001
User ID: 109999725002952105, Followers: 2410000
User ID: 109718180885037597, Followers: 2330004


## Users with highest engagement rate

In [27]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)

# Select the table
table = connection.table('user_table')

# Scan the table to retrieve user data
scan_result = table.scan(columns=['data:engagement_rate'])

# Create a dictionary to store user engagement rates
user_engagement = {}

# Process the scan result
for key, data in scan_result:
    user_id = key.decode('utf-8')
    engagement_rate = float(data[b'data:engagement_rate'].decode('utf-8'))
    user_engagement[user_id] = engagement_rate

# Close the HBase connection
connection.close()

# Sort the users by engagement rate and get the top users
top_users = sorted(user_engagement.items(), key=lambda item: item[1], reverse=True)[:3]

# Print the top users and their engagement rates
for user_id, engagement_rate in top_users:
    eng = float(engagement_rate) * 100
    print(f"User ID: {user_id}, Engagement Rate: {eng:.2f} %")

User ID: 110643001753379072, Engagement Rate: 19.05 %
User ID: 111243750378556518, Engagement Rate: 7.14 %
User ID: 110657506981999016, Engagement Rate: 5.62 %


## User growth over time

In [28]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)

# Select the table
table = connection.table('croissance_table')

# Scan the table to retrieve data
scan_result = table.scan(columns=['data:count'])

# Create a dictionary to store month-wise user counts
month_user_counts = {}

# Process the scan result
for key, data in scan_result:
    month = key.decode('utf-8')
    count = int(data[b'data:count'].decode('utf-8'))
    month_user_counts[month] = count

# Close the HBase connection
connection.close()

# Sort the months by user counts and get the top 3
top_months = sorted(month_user_counts.items(), key=lambda item: item[1], reverse=True)[:3]

# Print the top 3 months and their user counts
for month, count in top_months:
    print(f"Month: {month}, User Count: {count}")

Month: 2022-11, User Count: 45
Month: 2022-12, User Count: 14
Month: 2022-04, User Count: 13


# Content analysis
---

## Top 3 websites

In [29]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)

# Select the table
table = connection.table('url_table')

# Scan the table to retrieve data
scan_result = table.scan(columns=['data:count'])

# Create a dictionary to store website mention counts
website_counts = {}

# Process the scan result
for key, data in scan_result:
    website = key.decode('utf-8')
    count = int(data[b'data:count'].decode('utf-8'))
    website_counts[website] = count

# Close the HBase connection
connection.close()

# Sort the websites by mention counts and get the top 3
top_websites = sorted(website_counts.items(), key=lambda item: item[1], reverse=True)[:3]

# Print the top 3 websites and their mention counts
for website, count in top_websites:
    print(f"Website: {website}, Mention Count: {count}")

Website: mastodon.social, Mention Count: 137
Website: www.telam.com.ar, Mention Count: 70
Website: twitter.com, Mention Count: 58


# Language analysis
---

## Most used languages

In [30]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)


# Select the table
table = connection.table('language_table')

# Scan the table to retrieve language data
scan_result = table.scan(columns=['data:count'])

# Create a dictionary to store language counts
language_counts = {}

# Process the scan result
for key, data in scan_result:
    language = key.decode('utf-8')
    count = int(data[b'data:count'].decode('utf-8'))
    language_counts[language] = count

# Close the HBase connection
connection.close()

# Sort the language counts and get the top 3
top_languages = sorted(language_counts.items(), key=lambda item: item[1], reverse=True)[:3]

# Print the top 3 languages and their counts
for language, count in top_languages:
    print(f"Language: {language}, Count: {count}")

Language: en, Count: 1677
Language: de, Count: 313
Language: es, Count: 115


# Media engagement
---

## Count of posts with media

In [35]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)

# Select the table
table = connection.table('toot_with_media_table')

# Specify the row key you want to retrieve
row_key = b'toot_with_media'  # Use bytes for the row key

# Use the get method to retrieve the row
row_data = table.row(row_key)

# Close the HBase connection
connection.close()

# Print or process the retrieved data
if row_data:
    for column, cell in row_data.items():
        print(f"There is {cell.decode()} Posts with media")
else:
    print(f"Row with key '{row_key.decode('utf-8')}' not found in the table.")

There is 619 Posts with media


# Tags and mentions analysis
---

## Most used tags

In [38]:
# Connect to HBase
# HBase connection settings
hbase_host = 'localhost'  # Replace with your HBase host
hbase_port = 9090  # Default HBase port
# Connect to HBase
connection = happybase.Connection(host=hbase_host, port=hbase_port)


# Select the table
table = connection.table('tag_table')

# Scan the table to retrieve data
scan_result = table.scan(columns=['data:count'])

# Create a dictionary to store tag counts
tag_counts = {}

# Process the scan result
for key, data in scan_result:
    tag = key.decode('utf-8')
    count = int(data[b'data:count'].decode('utf-8'))
    tag_counts[tag] = count

# Close the HBase connection
connection.close()

# Sort the tags by count and get the top 3
top_tags = sorted(tag_counts.items(), key=lambda item: item[1], reverse=True)[:3]

# Print the top 3 tags and their counts
for tag, count in top_tags:
    print(f"Tag: {tag}, Count: {count}")

Tag: michigan, Count: 56
Tag: press, Count: 46
Tag: genocide, Count: 42
