# Network Analysis Assignment

### Due Apr 20, 2016
### By Jacob Metzger

Using assignment prompt/notebook

In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#Read in gml containing graph data
M=nx.read_gml("polblogs.gml")

In [3]:
# Turns repeated connections between nodes into a weight.

# create weighted graph from M
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    if G.has_edge(u,v):
        G[u][v]['weight'] += 1
    else:
        G.add_edge(u, v, weight=1)

### 1.  Create a nice looking graph of the political blog network

In [4]:
#Export networkx graph for manipulation in Gephi
nx.write_gexf(G, "polblogs.gexf")

In [5]:
#Import resultant exported image from Gephi
#Note: This has been filtered to display node degree>100 
#and colored with green to blue to indicate lower to higher degree.
#Thickness indicates the weight of the connection.

from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "networkAnalysisFromGephi.png")

Just a glancing observation: It's interesting, but maybe not unexpected, that nodes that appear to be associated with a particular political perspective tend to have been located nearer to each other by the graphing algorithm.

### 2.  What are the 10 most influential blogs?

In [6]:
centrality=nx.eigenvector_centrality(G)

In [7]:
person_list = []
for node in centrality:
    person_list.append((node,centrality[node]))
    
sorted_person_list = person_list.sort(key=lambda x: x[1])

In [8]:
sorted(person_list, key=lambda x: x[1], reverse=True)[0:10]

[(u'atrios.blogspot.com', 0.19480797936637712),
 (u'dailykos.com', 0.16089740205234834),
 (u'washingtonmonthly.com', 0.14653924259836884),
 (u'digbysblog.blogspot.com', 0.14264369221718315),
 (u'bodyandsoul.typepad.com', 0.14113489174330365),
 (u'liberaloasis.com', 0.13952371571107874),
 (u'tbogg.blogspot.com', 0.1370364169982426),
 (u'corrente.blogspot.com', 0.1294175927409615),
 (u'talkingpointsmemo.com', 0.12700519780790953),
 (u'dneiwert.blogspot.com', 0.12237564692546021)]

### 3.  What are the 10 least influential blogs?



In [9]:
sorted(person_list, key=lambda x: x[1], reverse=False)[0:10]

[(u'thelonedem.com', 1.776325109755966e-72),
 (u'digital-democrat.blogspot.com', 1.776325109755966e-72),
 (u'americanworldview.tripod.com/weltansblog', 7.491540638916243e-09),
 (u'quimundus.modblog.com', 6.682929975006485e-08),
 (u'batr.org/commentary.html', 6.795517611554005e-07),
 (u'batr.net', 6.796325594929459e-07),
 (u'democratvoice.org', 1.0233388204113302e-06),
 (u'quimundus.squarespace.com', 6.061741218869533e-06),
 (u'massachusetts-liberal.com', 9.483365235734506e-06),
 (u'thingsasitis.net', 1.173624965065861e-05)]

### 4.  Answer this question for both left wing and right wing blogs seperately.  (Optional 10 Extra Credit Points)

### Right wing

In [10]:
rightWing = [u[0] for u in M.nodes(data=True) if u[1]['value']==1] #Get 'right wing' nodes

In [11]:
# Turns repeated connections between nodes into a weight.
# create weighted graph from M
R = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    #print u
    if (u not in rightWing) or (v not in rightWing): #Filter out the ones we don't want
        continue
    if R.has_edge(u,v):
        R[u][v]['weight'] += 1
    else:
        R.add_edge(u, v, weight=1)

In [12]:
centrality=nx.eigenvector_centrality(R)

In [13]:
person_list = []
for node in centrality:
    person_list.append((node,centrality[node]))
    
sorted_person_list = person_list.sort(key=lambda x: x[1])

#### Most influential

In [14]:
sorted(person_list, key=lambda x: x[1], reverse=True)[0:10]

[(u'instapundit.com', 0.21139123782218378),
 (u'blogsforbush.com', 0.20315985849713214),
 (u'hughhewitt.com', 0.1709179903989045),
 (u'lashawnbarber.com', 0.1588744333304724),
 (u'michellemalkin.com', 0.1474069323583247),
 (u'powerlineblog.com', 0.1462399641016771),
 (u'wizbangblog.com', 0.14567416434521388),
 (u'littlegreenfootballs.com/weblog', 0.13915289252981464),
 (u'vodkapundit.com', 0.13681297045170163),
 (u'outsidethebeltway.com', 0.12258374528315184)]

#### Least influential

In [15]:
sorted(person_list, key=lambda x: x[1], reverse=False)[0:10]

[(u'poeticvalues.blogspot.com', 5.073078363712568e-28),
 (u'markheimonen.blogspot.com', 5.073078363712568e-28),
 (u'americanworldview.tripod.com/weltansblog', 5.687509322811977e-22),
 (u'neoconswatch.blogspot.com', 1.090755983979966e-21),
 (u'batr.org/commentary.html', 1.5164293652458691e-21),
 (u'batr.net', 1.6179756529287944e-21),
 (u'quimundus.modblog.com', 3.8876319746756727e-07),
 (u'imprescindibile.ilcannocchiale.it', 1.66559586485033e-05),
 (u'quimundus.squarespace.com', 2.8347828965574297e-05),
 (u'depressedlonelybored.typepad.com', 0.00014278027072298573)]

### Left wing

In [16]:
leftWing = [u[0] for u in M.nodes(data=True) if u[1]['value']==0] #Get 'left wing' nodes

In [17]:
# Turns repeated connections between nodes into a weight.
# create weighted graph from M
L = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    #print u
    if (u not in leftWing) or (v not in leftWing): #Filter out the ones we don't want.
        continue
    if L.has_edge(u,v):
        L[u][v]['weight'] += 1
    else:
        L.add_edge(u, v, weight=1)

In [18]:
centrality=nx.eigenvector_centrality(L)

In [19]:
person_list = []
for node in centrality:
    person_list.append((node,centrality[node]))
    
sorted_person_list = person_list.sort(key=lambda x: x[1])

#### Most influential

In [20]:
sorted(person_list, key=lambda x: x[1], reverse=True)[0:10]

[(u'atrios.blogspot.com', 0.20960069262288047),
 (u'dailykos.com', 0.16836966726258357),
 (u'bodyandsoul.typepad.com', 0.15679455118140725),
 (u'liberaloasis.com', 0.15611669689292246),
 (u'digbysblog.blogspot.com', 0.15543641689579624),
 (u'tbogg.blogspot.com', 0.1501036544219528),
 (u'corrente.blogspot.com', 0.14485933101755427),
 (u'washingtonmonthly.com', 0.1396814941740542),
 (u'dneiwert.blogspot.com', 0.13474938566339847),
 (u'pandagon.net', 0.13295188387217463)]

#### Least influential

In [21]:
sorted(person_list, key=lambda x: x[1], reverse=False)[0:10]

[(u'thelonedem.com', 2.3485107549626825e-19),
 (u'digital-democrat.blogspot.com', 2.3485107549626825e-19),
 (u'democratvoice.org', 2.3485107549626825e-19),
 (u'enemykombatant.blogspot.com', 2.3485107549626825e-19),
 (u'vernsblog.thegillfamily.us:8180', 7.219003827223708e-06),
 (u'massachusetts-liberal.com', 1.1209610884941808e-05),
 (u'thingsasitis.net', 1.2208135676401794e-05),
 (u'loveamericahatebush.com', 2.252119420762977e-05),
 (u'eltcamerica.blogspot.com', 2.5858542294598836e-05),
 (u'usademocrazy.blogspot.com', 3.54472969651666e-05)]