In [1]:
import wikipedia
import igraph
import unicodedata

In [2]:
def check_result(search_query, search_result):
    print type(search_query)
    print type(search_result)
    if(search_query.lower() == search_result.lower()):
        return True
    else:
        return False

In [75]:
class WikipediaGraph(igraph.Graph):
    def __init__(self, start_page, num_iterations):
        igraph.Graph.__init__(self)
        self.start_page = start_page
        self.num_iterations = num_iterations
        self.unicode_errors = []
        
        self.add_page_to_graph(self.start_page)


###
# Instance Methods
###        
    
    def is_page_in_graph(self, page_name):
        """
            Checks whether a page named "page_name" is in the graph
        """
        page_vertex = None
        try:
            page_vertex = self.vs.find(name=page_name)
            return True
        except ValueError:
            return False
        
    def is_page_mapped(self, page_name):
        """
            Checks whether page "page_name" has been mapped
        """
        if self.is_page_in_graph(page_name) == False:
            return False
        if self.vs.find(name=page_name)["is_mapped"] == False:
            return False
        return True
    
    def add_page_to_graph(self, page_name):
        max_map = 10
        count = 1
        
        page = None
        try:
            page = wikipedia.page(page_name)
        except (wikipedia.DisambiguationError, wikipedia.PageError) as e:
            print page_name
            return
        print page_name
        page_links = map(lambda x: unicodedata.normalize('NFKD', x).encode('ascii','ignore'), page.links)
        
        # if page isn't in graph
        if self.is_page_in_graph(page_name) == False:
            self.add_vertex(name = page_name) # add the page to the graph
            self.vs.find(name = page_name)["is_mapped"] = True
            for link in page_links:
                if count > max_map:
                    break
                count += 1
                
                if self.is_page_in_graph(link) == False: # if the target page isn't already in the graph
                    link_vertex = self.add_vertex(name = link)
                    self.vs.find(name = link)["is_mapped"] = False
                    
                self.add_edge(page_name, link) # connectes the source and target pages
        
        # if page is in graph but hasn't been mapped
        if self.is_page_mapped(page_name) == False and self.is_page_in_graph(page_name) == True:
            self.vs.find(name = page_name)["is_mapped"] = True
            for link in page_links:
                if count > max_map:
                    break
                count += 1
                
                if self.is_page_in_graph(link) == False: # if the target page isn't already in the graph
                    link_vertex = self.add_vertex(name = link)
                    self.vs.find(name = link)["is_mapped"] = False
                    
                self.add_edge(page_name, link) # connectes the source and target pages
        
        if self.is_page_mapped(page_name) == True and self.is_page_in_graph(page_name) == True:
            return
    # class Foo(Bar):
    #   def baz(self, arg):
    #     return super(Foo, self).baz(arg)
    
    # '{} {}'.format(1, 2)
    
    def write(self):
        file_name = "{}_v_{}_e_{}.gml".format(self.start_page, self.vcount(), self.ecount())
        print file_name
        return super(WikipediaGraph, self).write(file_name, format="gml")

###
# Class Methods
###
        
    def check_result(search_query, search_result):
        print type(search_query)
        print type(search_result)
        if(search_query.lower() == search_result.lower()):
            return True
        else:
            return False

In [76]:
g = WikipediaGraph("Quantum Mechanics", 2)

Quantum Mechanics


In [77]:
print g.unicode_errors
print map(lambda x: unicodedata.normalize('NFKD', x).encode('ascii','ignore'), g.unicode_errors)

[]
[]


In [78]:
print g.summary()
print g.vcount()
print g.ecount()

IGRAPH UN-- 11 10 -- 
+ attr: is_mapped (v), name (v)
11
10


In [79]:
# len(wikipedia.page("Quantum Mechanics").links)

In [81]:
count = 0
current_vertices = igraph.VertexSeq(g).select(is_mapped=False)[:]
print current_vertices.indices
for v in current_vertices:
    print current_vertices.indices
    print count
    g.add_page_to_graph(v["name"])
    count += 1
print g.summary()
g.write()

[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]
[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]
0
1-form
[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 

In [73]:
g.write()

In [8]:
wikipedia.search("BohrEinstein debates")

[u'Bohr\u2013Einstein debates',
 u'Bohr',
 u'Quantum (book)',
 u'Niels Bohr',
 u'List of things named after Albert Einstein',
 u'List of things named after Niels Bohr',
 u'Complementarity (physics)',
 u'Albert Einstein',
 u'Photon entanglement',
 u'Hidden variable theory']