In [161]:
# To use louvain algorithm
!pip3 install python-louvain
# To use progressbar
!pip3 install progressbar2

Collecting progressbar2
  Downloading progressbar2-3.55.0-py2.py3-none-any.whl (26 kB)
Collecting python-utils>=2.3.0
  Downloading python_utils-2.5.6-py2.py3-none-any.whl (12 kB)
Installing collected packages: python-utils, progressbar2
Successfully installed progressbar2-3.55.0 python-utils-2.5.6


In [1]:
import pandas as pd
import numpy as np
import math as m
import time 
from pycowview.data import csv_read_FA
from pycowview.manipulate import unique_cows
from pycowview.metrics import interaction_time
import networkx as nx
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import itertools
import os
import community
from collections import defaultdict
import progressbar
import random

In [2]:
# This function will get the path of each csv file
def findAllFile(base):
    for root, ds, fs in os.walk(base):
        for f in fs:
            if f.endswith('.csv'):
                fullname = os.path.join(root, f)
                yield fullname

# Input is the folders where the time matrix and cowlist are saved
# Output is a list which consists of 14 dictionaries
# The structure of dictionary:Cowlist,TimeMatrix,AajacencyMatrix_binary,Unweighted_Graph
def time_matrix_to_graph(tm_folder,cl_folder):
    dict_list = []
    i = 0
    tmlist = list(findAllFile(tm_folder))
    tmlist.sort()
    cllist = list(findAllFile(cl_folder))
    cllist.sort()
    for tm,cl in zip(tmlist,cllist):
        # print(tm,cl)
        # Get the path of csv
        # get cowlist
        cowlist = np.loadtxt(cl,delimiter=",").astype(int)
        # load original time matrix from csv and process it to be an adjacency Matrix
        OM = np.asmatrix(np.loadtxt(tm,delimiter=","))
        
        # Get unweighted adjacency matrix(binary)
        # init adjacency matrix
        AM = np.zeros((OM.shape))
        # set the threshold to be 30 minutes(1800 seconds)
        epsilon = 1800
        # just consider if there is an edge between two cows, the edge is unweighted
        AM[OM >= epsilon] = 1
        AM[OM < epsilon] = 0
        
        # Get weighted adjacency matrix
        
        
        # Get graph from AM, no-direct and no-weight graph
        G_AM_temp = nx.from_numpy_matrix(AM,parallel_edges=False,create_using = nx.Graph())
        # Make sure the order of cowlist is the same as the row name!
        print('Shape of matrix:',AM.shape)
        print('number of nodes in graph',len(G_AM_temp),'length of cowlist',len(cowlist))
        mapping = dict(zip(G_AM_temp, cowlist))
        #print(mapping)
        # Rename the nodes
        G_AM = nx.relabel_nodes(G_AM_temp, mapping)
        
        # Get the dict of the collection(CL,TM,AM_binary,Graph)
        data_dict = dict(CL=cowlist,TM=OM,AM_binary=AM,Graph=G_AM)
        print('Document No.',i)
        print('TM path:',tm,'CL path:',cl)
        i = i + 1
        dict_list.append(data_dict)

    print('The length of the list: ',len(dict_list))
    return dict_list

In [3]:
# This part is used to process the time matrices
tm_folder = './time_matrix'
cl_folder = './cow_list'
data_dict_list = time_matrix_to_graph(tm_folder,cl_folder)

Shape of matrix: (213, 213)
number of nodes in graph 213 length of cowlist 213
Document No. 0
TM path: ./time_matrix\Time_FA_20201016T000000UTC.csv CL path: ./cow_list\Cow_list_20201016T000000UTC.csv
Shape of matrix: (212, 212)
number of nodes in graph 212 length of cowlist 212
Document No. 1
TM path: ./time_matrix\Time_FA_20201017T000000UTC.csv CL path: ./cow_list\Cow_list_20201017T000000UTC.csv
Shape of matrix: (219, 219)
number of nodes in graph 219 length of cowlist 219
Document No. 2
TM path: ./time_matrix\Time_FA_20201018T000000UTC.csv CL path: ./cow_list\Cow_list_20201018T000000UTC.csv
Shape of matrix: (208, 208)
number of nodes in graph 208 length of cowlist 208
Document No. 3
TM path: ./time_matrix\Time_FA_20201019T000000UTC.csv CL path: ./cow_list\Cow_list_20201019T000000UTC.csv
Shape of matrix: (209, 209)
number of nodes in graph 209 length of cowlist 209
Document No. 4
TM path: ./time_matrix\Time_FA_20201020T000000UTC.csv CL path: ./cow_list\Cow_list_20201020T000000UTC.csv


In [9]:
# Girvan-Newman community detection
def community_detection_GN(i,pos,G_AM):
    # Remove the nodes whose degree is zero
    nodes_removed = [node for node,degree in dict(G_AM.degree()).items() if degree == 0]
    G_AM.remove_nodes_from(nodes_removed)

    # Start GN algorithm
    comp = nx.algorithms.community.girvan_newman(G_AM)

    # limit the number of communities, k =20 communities we assume
    k = 20
    limited = itertools.takewhile(lambda c: len(c) <= k, comp)
    communities_GN = list(limited)[-1]
    
    print(nx.algorithms.community.quality.modularity(G_AM,communities_GN))

    # Give the nodes in the graph an attribute:community_GN
    # The erial number of communities starts from 1
    community_dict_GN = defaultdict(list)
    community_num_GN = 1
    for community_GN in communities_GN:
        for character_GN in community_GN:
            community_dict_GN[character_GN].append(community_num_GN)
            nx.set_node_attributes(G_AM, community_dict_GN, 'community_GN')
        community_num_GN += 1

    # Part for plotting and saving figures

    # compute graph layout
    #pos = nx.kamada_kawai_layout(G_AM)
    #pos = nx.random_layout(G_AM) 
    #pos = nx.circular_layout(G_AM)  
    #pos = nx.shell_layout(G_AM)
    #pos = nx.spring_layout(G_AM, k=0.2, pos=None, fixed=None, iterations=50, threshold=0.0001, weight='weight', scale=1, center=None, dim=2, seed=7) 

    # image size
    plt.figure(figsize=(30, 30)) 
    nx.draw_networkx_nodes(G_AM, pos, node_size = 100,node_color = 'black',alpha = 1)
    nx.draw_networkx_edges(G_AM, pos, alpha=0.2)
    nx.draw_networkx_labels(G_AM, pos, alpha=0.5)
    # Colormap for plotting
    color_GN = 0
    color_map_GN = ['red', 'blue', 'yellow', 'purple',  'pink', 'green', 'pink','brown','cyan','gold','red', 'blue', 'yellow', 'purple',  'pink', 'green', 'pink','brown','cyan','gold']
    for community_GN in communities_GN:
        nx.draw_networkx_nodes(G_AM, pos , nodelist = community_GN, node_size = 100, node_color = color_map_GN[color_GN])
        nx.draw_networkx_edges(G_AM, pos ,alpha = 0.02)
        color_GN += 1

    plt.savefig('./community/GN/Day%d.png'%i)    
    #plt.show()
    plt.close()
    
    return communities_GN

In [10]:
# the modularity will be printed
bar = progressbar.ProgressBar()
for i in bar(range(1,len(data_dict_list)+1)):
    pos = nx.spring_layout(data_dict_list[i-1].get('Graph'))
    community_detection_GN(i,pos,data_dict_list[i-1].get('Graph'))

  0% (0 of 14) |                         | Elapsed Time: 0:00:00 ETA:  --:--:--

0.0295638872527732


  7% (1 of 14) |#                        | Elapsed Time: 0:00:36 ETA:   0:07:54

0.0138355092838132


 14% (2 of 14) |###                      | Elapsed Time: 0:01:09 ETA:   0:06:34

0.031347756384817155


 21% (3 of 14) |#####                    | Elapsed Time: 0:02:02 ETA:   0:09:42

0.013193032048450975


 28% (4 of 14) |#######                  | Elapsed Time: 0:02:30 ETA:   0:04:43

0.05423599999999382


 35% (5 of 14) |########                 | Elapsed Time: 0:03:07 ETA:   0:05:34

0.05057953489051267


 42% (6 of 14) |##########               | Elapsed Time: 0:03:44 ETA:   0:04:55

0.059148085207132585


 50% (7 of 14) |############             | Elapsed Time: 0:04:20 ETA:   0:04:09

0.03400121495743141


 57% (8 of 14) |##############           | Elapsed Time: 0:04:54 ETA:   0:03:24

0.055606139438084415


 64% (9 of 14) |################         | Elapsed Time: 0:05:44 ETA:   0:04:11

0.02622277617039004


 71% (10 of 14) |#################       | Elapsed Time: 0:06:19 ETA:   0:02:18

0.01118757789927223


 78% (11 of 14) |##################      | Elapsed Time: 0:06:52 ETA:   0:01:39

0.027468715401200883


 85% (12 of 14) |####################    | Elapsed Time: 0:07:23 ETA:   0:01:01

0.04409670538977479


 92% (13 of 14) |######################  | Elapsed Time: 0:08:05 ETA:   0:00:41

0.014738894139886455


100% (14 of 14) |########################| Elapsed Time: 0:08:35 Time:  0:08:35


# Get the total cowlist in the 14 days （All cows that have appeared once will be counted in a cowlist）

In [None]:
# Get all cows in 14 days
cowlist_union = set()
for i in range(1,len(data_dict_list)+1):
    cowlist_union = cowlist_union.union(set(data_dict_list[i-1].get('CL')))
    print('cowlist',len(data_dict_list[i-1].get('CL')))
    print('shape of adjacency matrix',np.shape(data_dict_list[i-1].get('TM')))
    print('cowlist_union',len(cowlist_union))
    print(i)

# Compute betweenness

In [None]:
# Compute the betweenness and store it in the graph
def compute_betweenness(i,G_AM):
    # compute betweeness
    betweenness_dict = nx.betweenness_centrality(G_AM)
    nx.set_node_attributes(G_AM,betweenness_dict,'betweenness')
    #return betweenness_dict

In [None]:
bar = progressbar.ProgressBar()
for i in bar(range(1,len(data_dict_list)+1)):
    compute_betweenness(i,data_dict_list[i-1].get('Graph'))
    print(i)   