In [1]:
import snap
import numpy as np
import pandas as pd
import wget
import os
import gzip

In [2]:
def gz_data_loader(data_url):
    file_to_load = wget.download(data_url)
    filename = os.path.splitext(os.path.basename(data_url))[0]
    os.replace(file_to_load, os.path.join('input', file_to_load))

    with gzip.open(os.path.join('input', file_to_load), 'rb') as f:
        f_in = f.read()
        with open(os.path.join('input', filename), 'wb') as f_out:
            f_out.write(f_in)
            print('\n{} loaded and unpacked'.format(os.path.basename(data_url)))

1 Analyzing the Wikipedia voters network [27 points]

Download the Wikipedia voting network wiki-Vote.txt.gz: [ссылка](http://snap.stanford.edu/data/wiki-Vote.html)

![images/001.png](images/001.png)

![images/002.png](images/002.png)

2 Further Analyzing the Wikipedia voters network [33 points]

For this problem, we use the Wikipedia voters network. If you are using Python, you might
want to use NumPy, SciPy, and/or Matplotlib libraries.

![images/003.png](images/003.png)

3 Finding Experts on the Java Programming Language on StackOverow [40 points]

Download the StackOverow network stackoverflow-Java.txt.gz: [ссылка](http://snap.stanford.edu/class/cs224w-data/hw0/stackoverflow-Java.txt.gz). An edge (a, b) in the network
means that person a endorsed an answer from person b on a Java-related question.

![images/004.png](images/004.png)

![images/005.png](images/005.png)

You can find more details about this exercise on the Snap.py tutorial page: [тут](http://snap.stanford.edu/proj/snap-icwsm/). As an extra exercise, extend the tutorial to find experts in other program-
ming languages or topics.

# Analyzing the Wikipedia voters network

- [x] The number of nodes in the network.
- [x] The number of nodes with a self-edge (self-loop)
- [x] The number of directed edges in the network
- [x] The number of undirected edges in the network
- [x] The number of reciprocated edges in the network
- [x] The number of nodes of zero out-degree
- [x] The number of nodes of zero in-degree
- [x] The number of nodes with more than 10 outgoing edges (out-degree > 10)
- [x] The number of nodes with fewer than 10 incoming edges (in-degree < 10)

In [3]:
gz_data_loader('http://snap.stanford.edu/data/wiki-Vote.txt.gz')

100% [............................................................................] 290339 / 290339
wiki-Vote.txt.gz loaded and unpacked


In [4]:
G_wiki_vote = snap.LoadEdgeList(snap.PNGraph, "input/wiki-Vote.txt", 0, 1)

In [5]:
snap.PrintInfo(G_wiki_vote, "Standart statistcs")

In [6]:
# The number of nodes in the network (with degree greater than 0)
snap.CntNonZNodes(G_wiki_vote)

7115

In [7]:
# by iteratuon
node_summary = 0
for node in G_wiki_vote.Nodes():
    node_summary += 1
print(node_summary)

7115


In [8]:
# The number of nodes with a self-edge (self-loop)
snap.CntSelfEdges(G_wiki_vote)

0

In [9]:
# The number of directed edges in the network (unic!!!)
snap.CntUniqDirEdges(G_wiki_vote)

103689

In [10]:
# The number of undirected edges in the network (unic!!!)
snap.CntUniqUndirEdges(G_wiki_vote)

100762

In [11]:
# The number of reciprocated edges in the network (unic bidirectinal!!!)
snap.CntUniqBiDirEdges(G_wiki_vote)

2927

In [12]:
# The number of nodes of zero out-degree
snap.CntOutDegNodes(G_wiki_vote, 0)

1005

In [13]:
# The number of nodes of zero in-degree
snap.CntInDegNodes(G_wiki_vote, 0)

4734

In [14]:
# The number of nodes with more than 10 outgoing edges (out-degree > 10)
# The number of nodes with fewer than 10 incoming edges (in-degree < 10)
node_more_10_out = 0
node_less_10_in = 0
for node in G_wiki_vote.Nodes():
    if node.GetOutDeg() > 10:
        node_more_10_out += 1
    elif node.GetInDeg() < 10:
        node_less_10_in += 1
print('Out-degree > 10 has {} nodes, in-degree < 10 has {} nodes'.format(node_more_10_out, node_less_10_in))

Out-degree > 10 has 1612 nodes, in-degree < 10 has 4346 nodes


# Further Analyzing the Wikipedia voters network

# Finding Experts on the Java Programming Language on StackOverow

In [15]:
gz_data_loader('http://snap.stanford.edu/class/cs224w-data/hw0/stackoverflow-Java.txt.gz')

100% [..........................................................................] 2144812 / 2144812
stackoverflow-Java.txt.gz loaded and unpacked
