<b>Map Plot</b><br>
<a href="#Folium">Folium</a><br>
<b>Networks</b><br>
<a href="#NetworkX">Folium</a><br>
<b>Scrapping</b><br>
<a href="#BeautfulSoup">BeautfulSoup</a><br>
<a href="#DataReader">DataReader</a><br>
<b>APIs</b><br>
<a href="#Tweepy">Tweepy</a><br>

<a name="Folium"></a>
<h3>FOLIUM</h3>

In [None]:
import folium

In [5]:
# Making a map using the folium module
phone_map = folium.Map()

# Top three smart phone companies by market share in 2016.
companies = [
    {'loc': [37.4970,  127.0266], 'label': 'Samsung: ...%'},
    {'loc': [37.3318, -122.0311], 'label': 'Apple: ...%'},
    {'loc': [22.5431,  114.0579], 'label': 'Huawei: ...%'}] 

# Adding markers to the map.
for company in companies:
    marker = folium.Marker(location=company['loc'], popup=company['label'])
    marker.add_to(phone_map)

# The last object in the cell always gets shown in the notebook
phone_map

<a name="NetworkX"></a>
<h3>NetworkX</h3>

In [2]:
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
G = nx.Graph()
G.add_nodes_from([1, 2, 3])
G.node[1]['label'] = 'blue'
G.nodes(data=True)
G.add_edge(1, 2)
G.edges()
print('Total nodes: ',len(T.nodes()),' | Total edges: {}', len(T.edges()))
nx.draw(G, with_labels=True)
plt.show()

noi = [n for n, d in T.nodes(data=True) if d['occupation'] == 'scientist']             # Nodes of interest
eoi = [(u, v) for u, v, d in T.edges(data=True) if d['date'] < date(2009, 1, 1)]       # Edges of interest

# List compreension
# [ output expression for iterator variable in iterable if predicate expression ]

In [None]:
# Types of Graphs
G = nx.Graph()           # Undirected graph  --
D = nx.DiGraph()         # Direct graph      -->
M = nx.MultiGraph()      # Multi-edge graph 
MD = nx.MultiDiGraph()   # Multi-edge Direct graph
B = nx.barbell_graph(m1=5, m2=1)

nx.draw(G, with_labels=True)
# Weights on Edges
T.edge[1][10]['weight'] = 2      # Set 'weight' of the edge between node 1 and 10 of T to be equal to 2

# Self-loops

In [None]:
# The number of neighbors that a node has is called its "degree"

### DEGREE CENTRALITY            =  num_neighbour/ num_all_possible_neighbour
''' if self-loop is possible, +1 in 'Possibly Have' '''
G.neighbors(5)                   # show nodes with 5 neighbors
nx.degree_centrality(G)          # 

### BETWEENNESS CENTRALITY       = num_shortest paths through node / all possible shortest paths
''' Capture bottlenecks '''
nx.betweenness_centrality(G)


In [None]:
### Checking whether there are self-loops in the graph

def find_selfloop_nodes(G):
    """    Finds all nodes that have self-loops in the graph G.    """
    nodes_in_selfloops = []    
    for u, v in G.edges():
        if u == v:
            nodes_in_selfloops.append(u)            
    return nodes_in_selfloops

# Check whether number of self loops equals the number of nodes in self loops
assert T.number_of_selfloops() == len(find_selfloop_nodes(T))

###########################################################################################################################
# Define nodes_with_m_nbrs()
def nodes_with_m_nbrs(G, m):
    """    Returns all nodes in graph G that have m neighbors.    """
    nodes = set()
    for n in G.nodes():            
        if len(G.neighbors(n)) == m:
            nodes.add(n)
    return nodes

# Compute and print all nodes in T that have 6 neighbors
six_nbrs = nodes_with_m_nbrs(T,6)
print(six_nbrs)

# Compute the degree of every node
degrees = [len(T.neighbors(n)) for n in T.nodes()]
print(degrees)

###########################################################################################################################
### Breadth-first search Algorithm --- Find paths
# Define path_exists()
def path_exists(G, node1, node2):
    """    This function checks whether a path exists between two nodes (node1, node2) in graph G.    """
    visited_nodes = set()       
    queue = [node1]
    for node in queue:
        neighbors = G.neighbors(node) 
        if node2 in neighbors:
            print('Path exists between nodes {0} and {1}'.format(node1, node2))
            return True
            break    
        else:            
            visited_nodes.add(node)     # Add current node to visited nodes                        
            queue.extend([n for n in neighbors if n not in visited_nodes]) # Add neighb current node that haven't been visited
                
        if node == queue[-1]:           # Check to see if the final element of the queue has been reached
            print('Path does not exist between nodes {0} and {1}'.format(node1, node2))
            return False

###########################################################################################################################
# Define find_nodes_with_highest_deg_cent()
def find_nodes_with_highest_deg_cent(G):   
    deg_cent = nx.degree_centrality(G)                    # Compute the degree centrality of G
   #bet_cent = nx.betweenness_centrality(G)               # Compute betweenness centrality
    max_dc = max(list(deg_cent.values()))                 # Compute the maximum degree centrality    
   #max_bc = max(list(bet_cent.values()))                 # Compute the maximum betweenness centrality  
    nodes = set()
        
    for k, v in deg_cent.items():                         # Iterate over the degree centrality dictionary        
        if v == max_dc:                                   # Check if the current value has the maximum degree centrality   
            nodes.add(k)                                  # Add the current node to the set of nodes            
    return nodes
    
top_dc = find_nodes_with_highest_deg_cent(T)              # Find the node(s) that has the highest degree centrality in T
print(top_dc)

for node in top_dc:                                       # Write the assertion statement
    assert nx.degree_centrality(T)[node] == max(nx.degree_centrality(T).values())
    

###########################################################################################################################    
from itertools import combinations                       #### COOL ####
# Define is_in_triangle() 
def is_in_triangle(G, n):
    """ Checks whether a node `n` in graph `G` is in a triangle relationship or not. Returns a boolean. """
    in_triangle = False     
    for n1, n2 in combinations(G.neighbors(n), 2):       # Iterate over all possible triangle relationship combinations       
        if G.has_edge(n1, n2):                           # Check if an edge exists between n1 and n2
            in_triangle = True
            break
    return in_triangle

###########################################################################################################################
# Define node_in_open_triangle()
from itertools import combinations
def node_in_open_triangle(G, n):
    """ Checks whether pairs of neighbors of node `n` in graph `G` are in an 'open triangle' relationship with node `n`. """
    in_open_triangle = False    
    for n1, n2 in combinations(G.neighbors(n), 2):        # Iterate over all possible triangle relationship combinations      
        if not G.has_edge(n1, n2):                        # Check if n1 and n2 do NOT have an edge between them        
            in_open_triangle = True            
            break            
    return in_open_triangle

num_open_triangles = 0                                    # Compute the number of open triangles in T

for n in T.nodes():                                       # Iterate over all the nodes in T    
    if node_in_open_triangle(T, n):                       # Check if the current node is in an open triangle       
        num_open_triangles += 1                           # Increment num_open_triangles        
print(num_open_triangles)

###########################################################################################################################
# Define maximal_cliques()
def maximal_cliques(G, size):
    """    Finds all maximal cliques in graph `G` that are of size `size`.    """
    mcs = []
    for clique in nx.find_cliques(G):
        if len(clique) == size:
            mcs.append(clique)
    return mcs

###########################################################################################################################
nodes_of_interest = [29, 38, 42]

# Define get_nodes_and_nbrs()
def get_nodes_and_nbrs(G, nodes_of_interest):
    """    Returns a subgraph of the graph `G` with only the `nodes_of_interest` and their neighbors.    """
    nodes_to_draw = []       
    for n in nodes_of_interest:                         # Iterate over the nodes of interest    
        nodes_to_draw.append(n)                         # Append the nodes of interest to nodes_to_draw         
        for nbr in G.neighbors(n):                      # Iterate over all the neighbors of node n        
            nodes_to_draw.append(nbr)                   # Append the neighbors of n to nodes_to_draw            
    return G.subgraph(nodes_to_draw)

T_draw = get_nodes_and_nbrs(T, nodes_of_interest)       # Extract the subgraph with the nodes of interest
nx.draw(T_draw, with_labels=True)                       # Draw the subgraph to the screen
plt.show()


###########################################################################################################################

nodes = [n for n, d in T.nodes(data=True) if d['occupation'] == 'celebrity']   # Extract the nodes of interest
nodeset = set(nodes)                          # Create the set of nodes    
for n in nodes:                               # Iterate over nodes   
    nbrs = T.neighbors(n)                     # Compute the neighbors of n
    nodeset = nodeset.union(nbrs)             # Compute the union of nodeset and nbrs

T_sub = T.subgraph(nodeset)                   # Compute the subgraph using nodeset
nx.draw(T_sub, with_labels=True)              # Draw T_sub to the screen
plt.show()

In [None]:
########## CASE STUDY ##############
# Github user collaboration network
# Nodes: users
# Edges: collaboration on same GitHub repository
# Goal : Create recommendation system

# Import necessary modules
import networkx as nx 
import matplotlib.pyplot as plt
from nxviz import MatrixPlot
from nxviz.plots import ArcPlot
from nxviz import CircosPlot
from itertools import combinations
from collections import defaultdict

deg_cent = nx.degree_centrality(G)                                        # Degree Centrality
bet_cent = nx.betweenness_centrality(G)                                   # Betweenness Centrality

### Characterizing the network
plt.hist(list(deg_cent.values())); plt.show()                              # Plot the Degree Centrality
plt.hist(list(bet_cent.values())); plt.show()                              # Plot the Betweenness Centrality


### VIZUALIZATION
# Calculate the largest connected component subgraph
largest_ccs = sorted(list(nx.connected_component_subgraphs(G)), key=lambda x: len(x))[-1]

h = MatrixPlot(graph=largest_ccs, node_grouping='grouping')                # Create the customized MatrixPlot object
h.draw(); plt.show()                                                       # Draw the MatrixPlot to the screen

# Arc Plot
for n, d in G.nodes(data=True):
    G.node[n]['degree'] = nx.degree(G, n)                                  # Calculate the degree of each node
    
a = ArcPlot(graph=G, node_order='degree')                                  # Create the ArcPlot object
a.draw(); plt.show()                                                       # Draw the ArcPlot to the screen

# Circus Plot
c = CircosPlot(graph=G, node_order='degree', node_grouping='grouping', node_color='grouping')
c.draw(); plt.show()

### CLIQUES
# Finding cliques
cliques = nx.find_cliques(G)                                                # Calculate the maximal cliques in G
print(len(list(cliques)))                                                   # Count and print the number of maximal cliques in G

largest_clique = sorted(cliques, key=lambda x:len(x))[-1]                   # Author(s) part of the largest maximal clique

G_lc = G.subgraph(largest_clique)                                           # Create the subgraph of the largest_clique
c = CircosPlot(G_lc)                                                        # Create the CircosPlot object
c.draw(); plt.show()                                                        # Draw the CircosPlot to the screen

# Find important collaborators (degree centrality)
max_dc = max(deg_cent.values())                                             # Compute the maximum degree centrality
prolific_collaborators = [n for n, dc in deg_cent.items() if dc == max_dc]  # Find the user(s) that have collaborated the most
print(prolific_collaborators)                                               # Print the most prolific collaborator(s)


# Find largest communities of colladorators (maximum clique)
largest_max_clique = set(sorted(nx.find_cliques(G), key=lambda x: len(x))[-1])  # Identify the largest maximal clique
G_lmc = G.subgraph(largest_max_clique)                                      # Create a subgraph from the largest_max_clique

for node in G_lmc.nodes():                                                  # Go out 1 degree of separation
    G_lmc.add_nodes_from(G.neighbors(node))
    G_lmc.add_edges_from(zip([node]*len(G.neighbors(node)), G.neighbors(node))) # create a list of tuples

for n in G_lmc.nodes():                                                     # Record each node's degree centrality score
    G_lmc.node[n]['degree centrality'] = nx.degree_centrality(G_lmc)[n]
        
a = ArcPlot(G_lmc, node_order='degree centrality')                          # Create the ArcPlot object
a.draw(); plt.show()                                                        # Draw the ArcPlot to the screen


# Build a collaboration recommendation system (open triangles)
recommended = defaultdict(int)                                              # Initialize the defaultdict

for n, d in G.nodes(data=True):                                             # Iterate over all the nodes in G
    for n1, n2 in combinations(G.neighbors(n), 2):                # Iterate over all possible triangle relationship combinations 
        if not G.has_edge(n1, n2):                                          # Check whether n1 and n2 do not have an edge
            recommended[(n1, n2)] += 1                                      # Increment recommended

all_counts = sorted(recommended.values())                                   # Identify the top 10 pairs of users
top10_pairs = [pair for pair, count in recommended.items() if count > all_counts[-10]] # count > top 10 in all_counts

print(top10_pairs)


<a name="BeautfulSoup"></a>
<h3>BeautfulSoup

In [None]:
from bs4 import BeautfulSoup
import requests

url = 'http://....'
r = requests.get(url)
html_doc = r.text
soup = BeautfulSoup(html_doc)

print (soup.prettify())
print (soup.title)
print (soup.get_text())

for link in soup.find_all('a'):       # Find all tags with 'a'
    print (link.get('href'))

<a name="DataReader"></a>
<h3>DataReader - Pandas

In [None]:
### The DataReader: Access financial data online
from pandas_datareader.data import DataReader
from datetime import date

start = date(2015, 1, 1)
end = date(2016, 12, 31)
ticker = 'GOOG'
data_source = 'google'    # Google Finance
stock_data = DataReader(ticker, data_source, start, end)


start = date(1962, 1, 1)
series_code = 'DGS10'   # 10-year Treasury Rate
data_source = 'fred'    # FED Economic Data Service
data = DataReader(series_code, data_source, start)


start = date(2000, 1, 1)
series_code = 'DCOILWTICO'   # West Yexas Intermediate Oil Price
oil = DataReader(series_code, 'fred', start)
ticker = 'XOM'    # Exxon Mobile Corp
stock = DataReader(ticker, 'google', start)
data = pd.concat([stock[['Close']], oil], axis=1)  # Concat by columns


<a name="Tweepy"></a>
<h3>Tweepy

In [None]:
import tweepy, json

access_token = '...'
access_token_secret = '...'
consumer_key = '...'
consumer_secret = '...'

# DataCamp Example
# Store OAuth authentication credentials in relevant variables
# access_token = "1092294848-aHN7DcRP9B4VMTQIhwqOYiB14YkW92fFO8k8EPy"
# access_token_secret = "X4dHmhPfaksHcQ7SCbmZa2oYBBVSD2g8uIHXsp5CTaksx"
# consumer_key = "nZ6EA0FxZ293SxGNg8g8aP0HM"
# consumer_secret = "fJGEodwe3KiKUnsYJC3VRndj7jevVvXbK2D5EiJ2nehafRgA6i"

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# Define stream listener class
class MyStreamListener(tweepy.StreamListener):
    def __init__(self, api=None):
        super(MyStreamListener, self).__init__()
        self.num_tweets = 0
        self.file = open("tweets.txt", "w")            # creates a file

    def on_status(self, status):
        tweet = status.json
        self.file.write(json.dumps(tweet) + '\n')      # write to file
        tweet_list.append(status)
        self.num_tweets += 1
        if self.num_tweets < 100:                      # Onde 100 tweets
            return True
        else:
            return False
        self.file.close()                              # Close file

# Create Streaming object and authenticate
l = MyStreamListener()
stream = tweepy.Stream(auth, l)

# This line filters Twitter Streams to capture data by keywords:
stream.filter(track=['apples', 'oranges'])