## Loading data and modules 

In [1]:
import pandas as pd
import numpy as np
from os import getcwd
import matplotlib.pyplot as plt
import igraph
from ast import literal_eval

In [2]:
CURR_PATH = getcwd()
# to get the current working directory
PATH_DATA = CURR_PATH + "/data/"

accounts_data = pd.read_csv(PATH_DATA + "instagram_accounts.csv",converters={'id_followers': literal_eval})
posts_data = pd.read_csv(PATH_DATA + "instagram_posts.csv")

## Visualising the data 

In [3]:
print(accounts_data.head())
print(accounts_data["id_followers"])

   Unnamed: 0  id_user  nb_followers  nb_following  nb_posts     sex  \
0           0   288877           167            58        48  female   
1           1   140311            67            72         8  female   
2           2   182096           142            95        14  female   
3           3   208875           249            99       150    male   
4           4   960092            96           114        19  female   

                                        id_followers  \
0  [738818, 134147, 314454, 977416, 926730, 82740...   
1  [380289, 341188, 775558, 998151, 246792, 17869...   
2  [524806, 968200, 241324, 233490, 188948, 15054...   
3  [776192, 164353, 989698, 134147, 305670, 65792...   
4  [858624, 896013, 138779, 817185, 854563, 88580...   

                       department                   email  \
0  ('64', 'Pyrénées-Atlantiques')  zacharieweber@live.com   
1                ('03', 'Allier')   hugues65@fontaine.com   
2                  ('27', 'Eure')       gilles1

In [4]:
# dict_following = {key: 0 for key in accounts_data["id_user"].values}
# for idx in range(len(accounts_data)):
#     lst = accounts_data.at[idx, "id_followers"]
#     for ele in lst:
#         dict_following[ele] += 1

# accounts_data[accounts_data["id_user"]==625666]["nb_followers"] = 

In [5]:
# Checking for duplicates - and we found one!
print(accounts_data.shape)
duplicated_user_id = accounts_data[ accounts_data['id_user'].duplicated() == True ]["id_user"].values
# print(duplicated_user_id)
accounts_data.drop_duplicates(subset=['id_user'], inplace=True, keep=False)
accounts_data.reset_index(drop=True, inplace=True)
accounts_data.drop(columns=["Unnamed: 0"], inplace=True)
print(accounts_data.head())

for idx in range(len(accounts_data)):
    lst = accounts_data.at[idx, "id_followers"]
    for ele in duplicated_user_id:
        if ele in lst:
            lst.remove(ele)
    accounts_data.at[idx, "id_followers"] = lst

accounts_data["nb_followers"] = accounts_data["id_followers"].apply(len)

(3047, 11)
   id_user  nb_followers  nb_following  nb_posts     sex  \
0   288877           167            58        48  female   
1   140311            67            72         8  female   
2   182096           142            95        14  female   
3   208875           249            99       150    male   
4   960092            96           114        19  female   

                                        id_followers  \
0  [738818, 134147, 314454, 977416, 926730, 82740...   
1  [380289, 341188, 775558, 998151, 246792, 17869...   
2  [524806, 968200, 241324, 233490, 188948, 15054...   
3  [776192, 164353, 989698, 134147, 305670, 65792...   
4  [858624, 896013, 138779, 817185, 854563, 88580...   

                       department                   email  \
0  ('64', 'Pyrénées-Atlantiques')  zacharieweber@live.com   
1                ('03', 'Allier')   hugues65@fontaine.com   
2                  ('27', 'Eure')       gilles11@live.com   
3         ('22', "Côtes-d'Armor")    manoncolin

In [6]:
mappingFrNodeToUserId = dict(zip(range(len(accounts_data)), accounts_data['id_user']))
mappingFrUserIdToNode = {v: k for k,v in mappingFrNodeToUserId.items()}
dict_followers = dict( zip(accounts_data['id_user'], accounts_data['id_followers']) )
edges=[(mappingFrUserIdToNode[node_i], node_j) for node_i in dict_followers.keys() for node_j in list(map(lambda x: mappingFrUserIdToNode[x], dict_followers[node_i]))]

# print(edges)
g = igraph.Graph(edges=edges, directed=True)
g.vs["size"] = 1
g.layout_lgl()
# igraph.plot(g)

<Layout with 3045 vertices and 2 dimensions>

In [7]:
components = g.clusters("strong")
print(components)

Clustering with 3045 elements and 1 clusters
[0] 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
    21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
    39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
    57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
    75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
    93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
    109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
    124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
    139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
    154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
    169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
    184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
    199, 200, 201, 202

In [12]:
g.is_directed()
vertices = [i for i in range(g.vcount())]

g. is_bipartite()
(g.vcount(), g.ecount())
print(g.closeness(mode="out"))
print(g.harmonic_centrality(mode="out"))

[0.5141023475764229, 0.49901639344262294, 0.5116826357370987, 0.5213221442027745, 0.5053959820687365, 0.50074025333114, 0.5140155352921311, 0.5077564637197665, 0.507079793436615, 0.5211436397877076, 0.513062531602899, 0.510053619302949, 0.507079793436615, 0.5174230834608193, 0.5155826558265583, 0.5143629604596147, 0.5160196643498898, 0.5135819132782183, 0.5205198358413132, 0.5196312734721747, 0.508859913072551, 0.5171593611960584, 0.5096266532730621, 0.5142760601452948, 0.5165450534532496, 0.5077564637197665, 0.5074179029838306, 0.5030573458932408, 0.5196312734721747, 0.5027250206440957, 0.5047255844801857, 0.5158447720725301, 0.5160196643498898, 0.5175990477809896, 0.5113388207626407, 0.5126305153250252, 0.5154953429297205, 0.5012349744771941, 0.5154953429297205, 0.5190995907230559, 0.5167204209811577, 0.5090301003344482, 0.5184806676886391, 0.5176870748299319, 0.5079259135658268, 0.5210544334132147, 0.5147108555968888, 0.507248791868022, 0.508859913072551, 0.5105669238510567, 0.50489