In [1]:
import pandas as pd
import json
import requests

#### Step 1: Load in the JSON files ####

In [3]:
# Load the data from JSON files
with open("followers_1.json", "r") as f:
    followers_data = json.load(f)

with open("following.json", "r") as f:
    following_data = json.load(f)

# Print sample data to understand the structure
print("Sample followers data:")
print(followers_data[:2])  # Print the first two records for followers

print("\nSample following data:")
print(following_data["relationships_following"][:2])  # Print the first two records for following



Sample followers data:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/nxxelani', 'value': 'nxxelani', 'timestamp': 1735458644}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/chandni.jivrajani', 'value': 'chandni.jivrajani', 'timestamp': 1734870369}]}]

Sample following data:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/nxxelani', 'value': 'nxxelani', 'timestamp': 1735453444}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/chandni.jivrajani', 'value': 'chandni.jivrajani', 'timestamp': 1734870663}]}]


In [4]:
# Inspect followers data
print("Followers Data Sample:")
print(followers_data[:2])  # Print the first 2 records of followers

# Inspect following data
print("\nFollowing Data Sample:")
print(following_data["relationships_following"][:2])  # Print the first 2 records of following


Followers Data Sample:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/nxxelani', 'value': 'nxxelani', 'timestamp': 1735458644}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/chandni.jivrajani', 'value': 'chandni.jivrajani', 'timestamp': 1734870369}]}]

Following Data Sample:
[{'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/nxxelani', 'value': 'nxxelani', 'timestamp': 1735453444}]}, {'title': '', 'media_list_data': [], 'string_list_data': [{'href': 'https://www.instagram.com/chandni.jivrajani', 'value': 'chandni.jivrajani', 'timestamp': 1734870663}]}]


In [5]:
# Check for duplicates or empty entries in followers
followers_usernames = [item["string_list_data"][0]["value"] for item in followers_data]
print(f"Unique followers: {len(set(followers_usernames))}")
print(f"Duplicates in followers: {len(followers_usernames) - len(set(followers_usernames))}")

# Check for duplicates or empty entries in following
following_usernames = [item["string_list_data"][0]["value"] for item in following_data["relationships_following"]]
print(f"Unique following: {len(set(following_usernames))}")
print(f"Duplicates in following: {len(following_usernames) - len(set(following_usernames))}")


Unique followers: 1074
Duplicates in followers: 0
Unique following: 1026
Duplicates in following: 0


#### Step 2: Convert Data into Pandas Dataframe ####

In [6]:
# Extract followers' usernames
followers = [item["string_list_data"][0]["value"] for item in followers_data]
followers_df = pd.DataFrame(followers, columns=["username"])

# Extract following usernames
following = [item["string_list_data"][0]["value"] for item in following_data["relationships_following"]]
following_df = pd.DataFrame(following, columns=["username"])

# Print the first few rows of each DataFrame
print("Followers DataFrame:")
print(followers_df.head())

print("\nFollowing DataFrame:")
print(following_df.head())


Followers DataFrame:
            username
0           nxxelani
1  chandni.jivrajani
2      hargun.kkohli
3        savitha15_m
4          robfromhb

Following DataFrame:
            username
0           nxxelani
1  chandni.jivrajani
2      hargun.kkohli
3        savitha15_m
4     simply.yamomma


#### Step 3: Find the Differences ####

Goal:\
Identify users who follow you but you don't follow back.\
Identify users who you follow but they don't follow you back.


In [7]:
# Find users who follow you but you don't follow back
followers_not_following_back = followers_df[~followers_df["username"].isin(following_df["username"])]

# Find users you follow but who don't follow you back
following_not_followed_back = following_df[~following_df["username"].isin(followers_df["username"])]

# Print results
print("People who follow you but you don't follow back:")
followers_not_following_back


People who follow you but you don't follow back:


Unnamed: 0,username
75,lillysjuicebar
105,vzavala28
108,clonesyperifericos_usa
111,sunday.at.8
134,hamburglry
...,...
1041,alb1995
1043,eat_my_bagel
1046,tri_cera_top
1055,fakenews98765432


In [8]:
#Count of followers you don't follow back
followers_not_following_back.count()

username    238
dtype: int64

In [9]:
print("\nPeople you follow but they don't follow you back:")
following_not_followed_back


People you follow but they don't follow you back:


Unnamed: 0,username
8,manda_schoon
12,realtorbrittneyliz
37,lyssa_duran
55,feedthecatsucsd
60,sungodsoccerucsd
...,...
984,tiffanykalhor
992,aliviamaay
997,makennacarr
1019,_trregarro


In [10]:
#Count of accounts you follow but don't follow back
following_not_followed_back.count()

username    190
dtype: int64

In [11]:
# Key Metrics
total_followers = len(followers_df)
total_following = len(following_df)
not_following_back_count = len(followers_not_following_back)
not_followed_back_count = len(following_not_followed_back)

# Print Metrics
print(f"Total Followers: {total_followers}")
print(f"Total Following: {total_following}")
print(f"People who follow you but you don't follow back: {not_following_back_count}")
print(f"People you follow but they don't follow you back: {not_followed_back_count}")


Total Followers: 1074
Total Following: 1026
People who follow you but you don't follow back: 238
People you follow but they don't follow you back: 190
