# Instagram Network Analysis

In [73]:
import networkx as nx
import pandas as pd

### Number of nodes and edges 

In [2]:
G = nx.read_adjlist('adjList.txt', nodetype=str,create_using=nx.DiGraph())
print(G.number_of_nodes(),G.number_of_edges())

81815 235381


In [4]:
print(nx.average_clustering(G))

0.10504529195908346


In [7]:
print(len(nx.dominating_set(G)))

80801


In [9]:
follower_count = [(i,k) for i,k in (G.in_degree)]
follower_sorted = sorted(follower_count, key=lambda x: x[1], reverse=True)

### Followers count

In [88]:
#print(follower_sorted[:25])
df = pd.DataFrame(follower_sorted[:10], columns=['Account','Follower count'])
df

Unnamed: 0,Account,Follower count
0,nishanth.sekar,1478
1,afshu_afz,1384
2,____karikaalan,1206
3,rifqinsta,1110
4,hari_krishnan_18,1099
5,cadet_jp,1033
6,bala_ayyappa_,1005
7,harsha.baskar,989
8,venkat_37,985
9,uv._.vik,920


In [13]:
following_count = [(i,k) for i,k in (G.out_degree)]
following_sorted = sorted(following_count, key=lambda x: x[1], reverse=True)

### Following count 

In [89]:
#print(following_sorted[:25])
df = pd.DataFrame(following_sorted[:10], columns=['Account','Following count'])
df

Unnamed: 0,Account,Following count
0,venkat_37,1523
1,varsha_a_rao,1478
2,vignesh5003,1456
3,logeshwaran_rajagopal_,1438
4,arvind_ranganath,1414
5,j_p_k_7,1356
6,_navamani_,1353
7,thivyaprakas,1320
8,afshu_afz,1279
9,rifqinsta,1171


In [22]:
avg_neighbor_degrees = nx.average_neighbor_degree(G)
avg_neighb_deg_list = [(name,val) for name,val in avg_neighbor_degrees.items()]
avg_neigh_d_sorted = sorted(avg_neighb_deg_list, key=lambda x: x[1], reverse=True)

In [23]:
print(avg_neigh_d_sorted[:20])

[('goosh_002', 1523.0), ('rokul_5766', 1523.0), ('_smile_killer_05', 1523.0), ('ugasrinamasivayam', 1523.0), ('__onewaystreet__', 1523.0), ('mala.__.vika', 1523.0), ('neelaran_architects', 1523.0), ('antondasphotography', 1523.0), ('venkat_anantharaman', 1523.0), ('jeffrey_o7', 1523.0), ('_subash14_', 1523.0), ('gatearchitecture2022', 1523.0), ('factory_bats', 1523.0), ('balaceani_03', 1523.0), ('shriraam_archistographer_', 1523.0), ('global._.shoppers', 1523.0), ('umaiyarl_fashion', 1523.0), ('vigneshwar728', 1523.0), ('shoppingparadiseonline', 1523.0), ('v.2_animation_creations', 1523.0)]


In [26]:
accounts_with_zero_following=[]
for name,val in (G.out_degree):
    if val==0:
        accounts_with_zero_following.append((name,G.in_degree(name)))
        
accounts_with_0_following_sorted = sorted(accounts_with_zero_following, key=lambda x:x[1], reverse=True)

### Accounts with 0 following and their follower count within the network

In [78]:
#print(accounts_with_0_following_sorted)
df = pd.DataFrame(accounts_with_0_following_sorted, columns=['Account','Follower count'])
df

Unnamed: 0,Account,Follower count
0,swethasathish31,478
1,mokkapostu,116
2,virat.kohli,90
3,mahi7781,90
4,actorvijaysethupathi,79
5,samantharuthprabhuoffl,78
6,hiphoptamizha,77
7,mokkaengineer,70
8,be.like.bro,70
9,anirudhofficial,67


In [32]:
page_ranks = nx.pagerank(G)
page_ranks_sorted = sorted(page_ranks.keys(), key= lambda x: page_ranks[x], reverse=True)

### Page rank 

In [80]:
#print(page_ranks_sorted[:20])
df = pd.DataFrame(page_ranks_sorted[:20], columns=['Account'])
df

Unnamed: 0,Account
0,bala_ayyappa_
1,rifqinsta
2,afshu_afz
3,nishanth.sekar
4,cadet_jp
5,____karikaalan
6,hari_krishnan_18
7,harsha.baskar
8,yaznasai
9,raaaasz


In [39]:
hits = nx.hits(G)
hits_sorted = sorted(hits[0], key=lambda x:hits[0][x], reverse=True)

### Hits 

In [81]:
#print(hits_sorted[:20])
df = pd.DataFrame(hits_sorted[:20], columns=['Account'])
df

Unnamed: 0,Account
0,_raga.t.r_
1,elamparithi_ravichandran_
2,atheethaa29
3,pilot_vivek
4,jegadheesan_
5,yashwanth_113
6,caevinjoyce_26
7,divyanth2002
8,_rajan_10
9,akshansheriff


In [72]:
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)

# These two take exponentially large time which could take hours to compute

## Graph with specific subset of nodes and edges between them

In [41]:
import essentialRoutines
adjFile = open("adjList.txt","r")
adjList = adjFile.readlines()
adjFile.close()
allNodes = essentialRoutines.adjList_to_dict(adjList)

In [42]:
my_neighbors = allNodes['arjun_siva__']

In [44]:
G_small = nx.DiGraph()
for neighbor in my_neighbors:
    G_small.add_edge('arjun_siva__',neighbor)
    
for node in allNodes.keys():
    if 'arjun_siva__' in allNodes[node]:
        my_neighbors.append(node)
        G_small.add_edge(node, 'arjun_siva__')

In [45]:
for n1 in my_neighbors:
    for n2 in my_neighbors:
        try:
            if n1 in allNodes[n2]:
                G_small.add_edge(n2, n1)
        except:
            continue
            
print(G_small.number_of_nodes(), G_small.number_of_edges())

340 17079


In [55]:
b_c = nx.betweenness_centrality(G_small)
b_c_sorted = sorted(b_c.items(), key=lambda x:b_c[x[0]], reverse=True)

### Betweenness centrality 

In [82]:
#print(b_c_sorted[:20])
df = pd.DataFrame(b_c_sorted[:20], columns=['Account','Value'])
df

Unnamed: 0,Account,Value
0,arjun_siva__,0.290514
1,aakash_ananth,0.031453
2,__lost__illusion__2.0,0.022766
3,manick._,0.01914
4,rajkishore_kv,0.018518
5,_surya07prasath_,0.015132
6,syringe_stabber,0.01306
7,halith_ja,0.012536
8,zee.jayyyy,0.012431
9,elamparithi_ravichandran_,0.01234


In [57]:
c_c = nx.closeness_centrality(G_small)
c_c_sorted = sorted(c_c.items(), key=lambda x:c_c[x[0]], reverse=True)

### Closeness centrality 

In [83]:
#print(c_c_sorted[:20])
df = pd.DataFrame(c_c_sorted[:20], columns=['Account','Value'])
df

Unnamed: 0,Account,Value
0,arjun_siva__,0.860806
1,elamparithi_ravichandran_,0.650311
2,syringe_stabber,0.633927
3,manick._,0.62664
4,__lost__illusion__2.0,0.62664
5,sanjay_amith,0.619519
6,halith_ja,0.619519
7,_thirukrishnan,0.617181
8,_surya07prasath_,0.617181
9,aakash_ananth,0.613708


In [59]:
G_small_undir = G_small.to_undirected()

In [61]:
preds = nx.preferential_attachment(G_small_undir)
preds_sorted = sorted(preds, key=lambda x:x[2], reverse=True)

### Preferential attachment 

In [84]:
#print(preds_sorted[:20])
df = pd.DataFrame(preds_sorted[:20], columns=['Account 1','Account 2','Value'])
df

Unnamed: 0,Account 1,Account 2,Value
0,__lost__illusion__2.0,halith_ja,24124
1,__lost__illusion__2.0,zee.jayyyy,21353
2,logeshwaran_rajagopal_,aakash_ananth,21306
3,elamparithi_ravichandran_,aburar_aleem,21042
4,aburar_aleem,syringe_stabber,21042
5,_thirukrishnan,halith_ja,20720
6,__lost__illusion__2.0,_lucifer_2330_,20538
7,__lost__illusion__2.0,aburar_aleem,20538
8,__lost__illusion__2.0,dineshvar_23,20212
9,sanjay_amith,11.pranavkumar,20010


In [63]:
def find_unconnected_nodes(selected_node, node_list, G):
    
    # obtain all the nodes connected to the selected node
    connected_nodes = [n for _, n in G.edges(selected_node)]

    # a feasible node is one not in connected_nodes
    feasible_nodes = [feasible_n for feasible_n in node_list if feasible_n not in connected_nodes + [selected_node]]

    return feasible_nodes

In [65]:
unconnected_nodes = []
not_connected_list = find_unconnected_nodes('aakash_ananth', G_small_undir.nodes(), G_small_undir)
for n in not_connected_list:
    unconnected_nodes.append(('aakash_ananth',n))

In [66]:
j_coef = nx.jaccard_coefficient(G_small_undir, ebunch=unconnected_nodes)
j_coef_sorted = sorted(j_coef, key=lambda x:x[2], reverse=True)

### Jaccard Coefficient between a specific node and all nodes not connected to it 

In [87]:
#print(j_coef_sorted[:20])
df = pd.DataFrame(j_coef_sorted[:20], columns=['Account 1','Account 2','Value'])
df

Unnamed: 0,Account 1,Account 2,Value
0,aakash_ananth,logeshwaran_rajagopal_,0.388626
1,aakash_ananth,_.n.i.s.m.a._,0.35567
2,aakash_ananth,varsha_kmar,0.345361
3,aakash_ananth,deepan_101,0.345178
4,aakash_ananth,_navamani_,0.343284
5,aakash_ananth,chidhu9999,0.342105
6,aakash_ananth,sivarkrishnan5222,0.341709
7,aakash_ananth,ganapathy__g,0.339713
8,aakash_ananth,harsha.baskar,0.338235
9,aakash_ananth,___soulwritter_01____,0.336957


### Exporting to gexf format for Gephi

In [68]:
nx.write_gexf(G, "Full_graph.gexf")

In [69]:
nx.write_gexf(G_small, "Subset.gexf")