In [15]:
import os
import urllib3
import json
import joblib

import networkx as nx
import matplotlib.pyplot as plt

In [16]:
class ExtractData:
    
    stack_exchange_tags = []
    stack_exchange_url = ""
    tags_file_url = ""
    http = None
    
    def __init__(self, stack_exchange_url, tags_file_url):
        self.stack_exchange_url = stack_exchange_url
        self.tags_file_url = tags_file_url + "datascience_stackexchange_tag.joblib"
        self.http = urllib3.PoolManager()
    
    def extractTagsFromData(self, data):
        json_data = json.loads(data)
        for item in json_data["items"]:
            self.stack_exchange_tags.append(item["tags"])

    def getDataFromApi(self):
        for i in range(1, 101):
            url = self.stack_exchange_url.partition("=")
            url = url[0] + url[1] + str(i) + url[2][1:]
            response = self.http.request('GET', url)
            if(response.status == 200):
                json_data = self.extractTagsFromData(response.data)
            else:
                continue
        self.writeExtractedTagsInPickelFile()

    def writeExtractedTagsInPickelFile(self):
        joblib.dump(self.stack_exchange_tags, self.tags_file_url)
        
    def loadTagsFromFile(self):
        if(os.path.isfile(self.tags_file_url)):
            self.stack_exchange_tags = joblib.load(self.tags_file_url)
        
    def extractData(self):
        if(not os.path.isfile(self.tags_file_url)):
            print("File not present... downloading data...")
            self.getDataFromApi()
        else:
            print("Reading from the file.......\n")
            self.loadTagsFromFile()

In [17]:
class Graph:
    Tag_G = None
    
    def __init__(self):
        self.Tag_G = nx.Graph()
        
    def createGraph(self, stack_exchange_tags):
        for tag in stack_exchange_tags:
            prev = []
            for item in tag:
                if item not in self.Tag_G:
                    self.Tag_G.add_node(item)
                if(len(prev)!=0):
                    for node in prev:
                        if(self.Tag_G.has_edge(item, node)):
                            wt = self.Tag_G[item][node]["weight"]
                            self.Tag_G[item][node]["weight"] = wt+1
                        else:
                            self.Tag_G.add_edge(item, node, weight=1)
                prev.append(item)
            
    def generateEdgeList(self):
        print("Edge List:")
        for ed in nx.generate_edgelist(self.Tag_G):
            print(ed)
            
    def findNeighborsOfaTag(self, tag):
        neighbors_list = list(self.Tag_G.neighbors(tag))
        edge_with_weights = []
        for neighbor in neighbors_list:
            wt = self.Tag_G.get_edge_data(tag, neighbor)
            edge_with_weights.append((neighbor, wt["weight"]))
        edge_with_weights = tuple(sorted(edge_with_weights,key = lambda x: x[1], reverse=True))
        associated_tags = self.generateAssociatedTags(tag, edge_with_weights)
        return associated_tags
        
    def generateAssociatedTags(self, tag, edge_with_weights):
        associatedTags = [tag]
        for edge in edge_with_weights:
            associatedTags.append(edge[0])
        return associatedTags
    
    def plotGraph(self):
        #set figure size
        plt.figure(figsize=(50, 50)) 
        e_list = [(u, v) for (u, v, d) in Tag_G.edges(data=True)]

        pos = nx.spring_layout(Tag_G)  # positions for all nodes

        # nodes
        nx.draw_networkx_nodes(Tag_G, pos, node_size=700)

        # edges
        nx.draw_networkx_edges(Tag_G, pos, edgelist=e_list, width=1)

        # labels
        nx.draw_networkx_labels(Tag_G, pos, font_size=8, font_family='sans-serif')
        plt.axis('off')
        plt.show()

In [18]:
def main():
    api_url = "https://api.stackexchange.com/2.2/questions?page=1&pagesize=100&order=desc&sort=activity&site=datascience"
    file_url = "E://Ureka//"
    extract = ExtractData(api_url, file_url)
    graph = Graph()
    extract.extractData()
    graph.createGraph(extract.stack_exchange_tags)
    query = input("Enter the search query: ")
    associated_tags = graph.findNeighborsOfaTag(query)
    print(f"Searching for {query}....\n")
    print("Result is :")
    print(associated_tags)

In [20]:
if __name__=="__main__":
    main()

Reading from the file.......



Enter the search query:  svd


NetworkXError: The node svd is not in the graph.