# Instagram Network Analysis

In [42]:
import networkx as nx
import pandas as pd

### Number of nodes and edges 

In [43]:
G = nx.read_adjlist('adjList.txt', nodetype=str,create_using=nx.DiGraph())
print(G.number_of_nodes(),G.number_of_edges())

5481 5663


In [44]:
print(nx.average_clustering(G))

0.004075055388929472


In [45]:
print(len(nx.dominating_set(G)))

4310


In [46]:
follower_count = [(i,k) for i,k in (G.in_degree)]
follower_sorted = sorted(follower_count, key=lambda x: x[1], reverse=True)

### Followers count

In [47]:
#print(follower_sorted[:25])
df = pd.DataFrame(follower_sorted[:10], columns=['Account','Follower count'])
df

Unnamed: 0,Account,Follower count
0,jeanne_andreaa,51
1,whitehouse,51
2,the_sstan,25
3,donsonn,25
4,chenggao1999,25
5,"'fretin98',",4
6,,4
7,"'yifei_cc',",3
8,a_toi_omega_,3
9,"'page.soobin',",3


In [48]:
following_count = [(i,k) for i,k in (G.out_degree)]
following_sorted = sorted(following_count, key=lambda x: x[1], reverse=True)

### Following count 

In [49]:
#print(following_sorted[:25])
df = pd.DataFrame(following_sorted[:10], columns=['Account','Following count'])
df

Unnamed: 0,Account,Following count
0,fretin98,1864
1,khriswu8,201
2,as_cool_as_cucumber_amber,200
3,aaannie_wang,200
4,ivyyyl_,200
5,donsonn,200
6,yitingzhang,200
7,dawn_rz_,200
8,chenggao1999,200
9,ccclare_,199


In [50]:
avg_neighbor_degrees = nx.average_neighbor_degree(G)
avg_neighb_deg_list = [(name,val) for name,val in avg_neighbor_degrees.items()]
avg_neigh_d_sorted = sorted(avg_neighb_deg_list, key=lambda x: x[1], reverse=True)

In [51]:
print(avg_neigh_d_sorted[:20])

[('user1', 932.0), ('just_johnny94', 200.0), ('sethutilak', 200.0), ('jacob_fusek', 200.0), ('zoeyzoey_z', 200.0), ('_lucyhe_', 200.0), ('coconini_z', 200.0), ('yyyuqi1', 200.0), ('fluffypun', 200.0), ('dannyothello', 200.0), ('drashttiii27', 200.0), ('maggieeshumm', 200.0), ('arnaud.biebuyck', 200.0), ('alishapeng24601', 200.0), ('shaun_ngtc', 200.0), ('jjjjjason_zheng', 200.0), ('hrkfric', 200.0), ('chuanhweeeee', 200.0), ('dsaha02', 200.0), ('judyy_moon', 200.0)]


In [52]:
accounts_with_zero_following=[]
for name,val in (G.out_degree):
    if val==0:
        accounts_with_zero_following.append((name,G.in_degree(name)))
        
accounts_with_0_following_sorted = sorted(accounts_with_zero_following, key=lambda x:x[1], reverse=True)

### Accounts with 0 following and their follower count within the network

In [53]:
#print(accounts_with_0_following_sorted)
df = pd.DataFrame(accounts_with_0_following_sorted, columns=['Account','Follower count'])
df

Unnamed: 0,Account,Follower count
0,"'fretin98',",4
1,,4
2,"'yifei_cc',",3
3,a_toi_omega_,3
4,"'page.soobin',",3
...,...,...
5280,officemagazinenyc,1
5281,yqu53,1
5282,william_weisicheng,1
5283,lily_dead_fish,1


In [54]:
page_ranks = nx.pagerank(G)
page_ranks_sorted = sorted(page_ranks.keys(), key= lambda x: page_ranks[x], reverse=True)

### Page rank 

In [55]:
#print(page_ranks_sorted[:20])
df = pd.DataFrame(page_ranks_sorted[:20], columns=['Account'])
df

Unnamed: 0,Account
0,jeanne_andreaa
1,whitehouse
2,donsonn
3,the_sstan
4,chenggao1999
5,realdonaldtrump
6,flotus
7,borderpatrol
8,potus
9,vp


In [56]:
hits = nx.hits(G)
hits_sorted = sorted(hits[0], key=lambda x:hits[0][x], reverse=True)

  A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)


### Hits 

In [57]:
#print(hits_sorted[:20])
df = pd.DataFrame(hits_sorted[:20], columns=['Account'])
df

Unnamed: 0,Account
0,fretin98
1,the_sstan
2,25
3,chenggao1999
4,donsonn
5,plataspilarlopez
6,alwaysmirna
7,davidwi2024
8,mariannegnsr
9,jood_246812


In [58]:
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)

# These two take exponentially large time which could take hours to compute

## Graph with specific subset of nodes and edges between them

In [59]:
import essentialRoutines
adjFile = open("adjList.txt","r")
adjList = adjFile.readlines()
adjFile.close()
allNodes = essentialRoutines.adjList_to_dict(adjList)

In [60]:
my_neighbors = allNodes['fretin98']

In [61]:
G_small = nx.DiGraph()
for neighbor in my_neighbors:
    G_small.add_edge('fretin98',neighbor)
    
for node in allNodes.keys():
    if 'arjun_siva__' in allNodes[node]:
        my_neighbors.append(node)
        G_small.add_edge(node, 'fretin98')

In [62]:
for n1 in my_neighbors:
    for n2 in my_neighbors:
        try:
            if n1 in allNodes[n2]:
                G_small.add_edge(n2, n1)
        except:
            continue
            
print(G_small.number_of_nodes(), G_small.number_of_edges())

2 1


In [63]:
b_c = nx.betweenness_centrality(G_small)
b_c_sorted = sorted(b_c.items(), key=lambda x:b_c[x[0]], reverse=True)

### Betweenness centrality 

In [64]:
#print(b_c_sorted[:20])
df = pd.DataFrame(b_c_sorted[:20], columns=['Account','Value'])
df

Unnamed: 0,Account,Value
0,fretin98,0.0
1,eli.allegory,0.0


In [65]:
c_c = nx.closeness_centrality(G_small)
c_c_sorted = sorted(c_c.items(), key=lambda x:c_c[x[0]], reverse=True)

### Closeness centrality 

In [66]:
#print(c_c_sorted[:20])
df = pd.DataFrame(c_c_sorted[:20], columns=['Account','Value'])
df

Unnamed: 0,Account,Value
0,eli.allegory,1.0
1,fretin98,0.0


In [67]:
G_small_undir = G_small.to_undirected()

In [68]:
preds = nx.preferential_attachment(G_small_undir)
preds_sorted = sorted(preds, key=lambda x:x[2], reverse=True)

### Preferential attachment 

In [69]:
#print(preds_sorted[:20])
df = pd.DataFrame(preds_sorted[:20], columns=['Account 1','Account 2','Value'])
df

Unnamed: 0,Account 1,Account 2,Value


In [70]:
def find_unconnected_nodes(selected_node, node_list, G):
    
    # obtain all the nodes connected to the selected node
    connected_nodes = [n for _, n in G.edges(selected_node)]

    # a feasible node is one not in connected_nodes
    feasible_nodes = [feasible_n for feasible_n in node_list if feasible_n not in connected_nodes + [selected_node]]

    return feasible_nodes

In [71]:
unconnected_nodes = []
not_connected_list = find_unconnected_nodes('fretin98', G_small_undir.nodes(), G_small_undir)
for n in not_connected_list:
    unconnected_nodes.append(('fretin98',n))

In [72]:
j_coef = nx.jaccard_coefficient(G_small_undir, ebunch=unconnected_nodes)
j_coef_sorted = sorted(j_coef, key=lambda x:x[2], reverse=True)

### Jaccard Coefficient between a specific node and all nodes not connected to it 

In [73]:
#print(j_coef_sorted[:20])
df = pd.DataFrame(j_coef_sorted[:20], columns=['Account 1','Account 2','Value'])
df

Unnamed: 0,Account 1,Account 2,Value


### Exporting to gexf format for Gephi

In [74]:
nx.write_gexf(G, "Full_graph.gexf")

In [75]:
nx.write_gexf(G_small, "Subset.gexf")