In [None]:
import pandas as pd
import json
import matplotlib

#### Step 1: Load in the JSON files ####

In [12]:
# Load the data from JSON files
with open("followers_and_following/followers_1.json", "r") as f:
    followers_data = json.load(f)

with open("followers_and_following/following.json", "r") as f:
    following_data = json.load(f)

# Print sample data to understand the structure
print("Sample followers data:")
print(followers_data[:2])  # Print the first two records for followers

print("\nSample following data:")
print(following_data["relationships_following"][:2])  # Print the first two records for following



Sample followers data:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/savitha15_m', 'value': 'savitha15_m', 'timestamp': 1734506383}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/robfromhb', 'value': 'robfromhb', 'timestamp': 1734363104}]}]

Sample following data:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/savitha15_m', 'value': 'savitha15_m', 'timestamp': 1734451845}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/simply.yamomma', 'value': 'simply.yamomma', 'timestamp': 1734064744}]}]


In [14]:
# Inspect followers data
print("Followers Data Sample:")
print(followers_data[:2])  # Print the first 2 records of followers

# Inspect following data
print("\nFollowing Data Sample:")
print(following_data["relationships_following"][:2])  # Print the first 2 records of following


Followers Data Sample:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/savitha15_m', 'value': 'savitha15_m', 'timestamp': 1734506383}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/robfromhb', 'value': 'robfromhb', 'timestamp': 1734363104}]}]

Following Data Sample:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/savitha15_m', 'value': 'savitha15_m', 'timestamp': 1734451845}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/simply.yamomma', 'value': 'simply.yamomma', 'timestamp': 1734064744}]}]


In [40]:
# Check for duplicates or empty entries in followers
followers_usernames = [item["string_list_data"][0]["value"] for item in followers_data]
print(f"Unique followers: {len(set(followers_usernames))}")
print(f"Duplicates in followers: {len(followers_usernames) - len(set(followers_usernames))}")

# Check for duplicates or empty entries in following
following_usernames = [item["string_list_data"][0]["value"] for item in following_data["relationships_following"]]
print(f"Unique following: {len(set(following_usernames))}")
print(f"Duplicates in following: {len(following_usernames) - len(set(following_usernames))}")


Unique followers: 119
Duplicates in followers: 0
Unique following: 141
Duplicates in following: 0


#### Step 2: Convert Data into Pandas Dataframe ####

In [16]:
# Extract followers' usernames
followers = [item["string_list_data"][0]["value"] for item in followers_data]
followers_df = pd.DataFrame(followers, columns=["username"])

# Extract following usernames
following = [item["string_list_data"][0]["value"] for item in following_data["relationships_following"]]
following_df = pd.DataFrame(following, columns=["username"])

# Print the first few rows of each DataFrame
print("Followers DataFrame:")
print(followers_df.head())

print("\nFollowing DataFrame:")
print(following_df.head())


Followers DataFrame:
         username
0     savitha15_m
1       robfromhb
2  simply.yamomma
3    korinazamora
4         erin_y_

Following DataFrame:
         username
0     savitha15_m
1  simply.yamomma
2         erin_y_
3     j.azmiiinee
4    korinazamora


(119, 1)

#### Step 3: Find the Differences ####

Goal:\
Identify users who follow you but you don't follow back.\
Identify users who you follow but they don't follow you back.


In [23]:
# Find users who follow you but you don't follow back
followers_not_following_back = followers_df[~followers_df["username"].isin(following_df["username"])]

# Find users you follow but who don't follow you back
following_not_followed_back = following_df[~following_df["username"].isin(followers_df["username"])]

# Print results
print("People who follow you but you don't follow back:")
followers_not_following_back


People who follow you but you don't follow back:


Unnamed: 0,username
1,robfromhb
33,ashleighdee_
72,lillysjuicebar
76,vanessaoliva_
84,graciebarracorona
85,mariaxfloresss
98,sandra_shankal
102,vzavala28
105,clonesyperifericos_usa
108,sunday.at.8


In [36]:
#Count of followers you don't follow back
followers_not_following_back.count()

username    10
dtype: int64

In [20]:
print("\nPeople you follow but they don't follow you back:")
following_not_followed_back


People you follow but they don't follow you back:


Unnamed: 0,username
5,manda_schoon
8,itz___grace
10,realtorbrittneyliz
35,lyssa_duran
53,feedthecatsucsd
58,sungodsoccerucsd
64,it_be_me_marc
76,garagem_bjj_bones
79,angiechavarriii
86,nhfintl


In [35]:
#Count of accounts you follow but don't follow back
following_not_followed_back.count()

username    32
dtype: int64

In [34]:
# Key Metrics
total_followers = len(followers_df)
total_following = len(following_df)
not_following_back_count = len(followers_not_following_back)
not_followed_back_count = len(following_not_followed_back)

# Print Metrics
print(f"Total Followers: {total_followers}")
print(f"Total Following: {total_following}")
print(f"People who follow you but you don't follow back: {not_following_back_count}")
print(f"People you follow but they don't follow you back: {not_followed_back_count}")


Total Followers: 119
Total Following: 141
People who follow you but you don't follow back: 10
People you follow but they don't follow you back: 32
