In [2]:
import pandas as pd
import numpy as np
import math as m
import time 
from pycowview.data import csv_read_FA
from pycowview.manipulate import unique_cows
from pycowview.metrics import interaction_time
from onmi import onmi
import networkx as nx
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import itertools
import os
import community
from collections import defaultdict
import progressbar
import random

In [3]:
# This function will get the path of each csv file
def findAllFile(base):
    for root, ds, fs in os.walk(base):
        for f in fs:
            if f.endswith('.csv'):
                fullname = os.path.join(root, f)
                yield fullname

# Input is the folders where the time matrix and cowlist are saved
# Output is a list which consists of 14 dictionaries
# The structure of dictionary:Cowlist,TimeMatrix,AajacencyMatrix_binary,Unweighted_Graph
def time_matrix_to_graph(tm_folder,cl_folder):
    dict_list = []
    i = 0
    tmlist = list(findAllFile(tm_folder))
    tmlist.sort()
    cllist = list(findAllFile(cl_folder))
    cllist.sort()
    for tm,cl in zip(tmlist,cllist):
        # print(tm,cl)
        # Get the path of csv
        # get cowlist
        cowlist = np.loadtxt(cl,delimiter=",").astype(int)
        # load original time matrix from csv and process it to be an adjacency Matrix
        OM = np.asmatrix(np.loadtxt(tm,delimiter=","))
        
        # Get unweighted adjacency matrix(binary)
        # init adjacency matrix
        AM = np.zeros((OM.shape))
        # set the threshold to be 30 minutes(1800 seconds)
        epsilon = 1800
        # just consider if there is an edge between two cows, the edge is unweighted
        AM[OM >= epsilon] = 1
        AM[OM < epsilon] = 0
        
        # Get weighted adjacency matrix
        
        
        # Get graph from AM, no-direct and no-weight graph
        G_AM_temp = nx.from_numpy_matrix(AM,parallel_edges=False,create_using = nx.Graph())
        # Make sure the order of cowlist is the same as the row name!
        print('Shape of matrix:',AM.shape)
        print('number of nodes in graph',len(G_AM_temp),'length of cowlist',len(cowlist))
        mapping = dict(zip(G_AM_temp, cowlist))
        #print(mapping)
        # Rename the nodes
        G_AM = nx.relabel_nodes(G_AM_temp, mapping)
        
        # Get the dict of the collection(CL,TM,AM_binary,Graph)
        data_dict = dict(CL=cowlist,TM=OM,AM_binary=AM,Graph=G_AM)
        print('Document No.',i)
        print('TM path:',tm,'CL path:',cl)
        i = i + 1
        dict_list.append(data_dict)

    print('The length of the list: ',len(dict_list))
    return dict_list

In [4]:
# This part is used to process the time matrices
tm_folder = './time_matrix'
cl_folder = './cow_list'
data_dict_list = time_matrix_to_graph(tm_folder,cl_folder)

Shape of matrix: (213, 213)
number of nodes in graph 213 length of cowlist 213
Document No. 0
TM path: ./time_matrix\Time_FA_20201016T000000UTC.csv CL path: ./cow_list\Cow_list_20201016T000000UTC.csv
Shape of matrix: (212, 212)
number of nodes in graph 212 length of cowlist 212
Document No. 1
TM path: ./time_matrix\Time_FA_20201017T000000UTC.csv CL path: ./cow_list\Cow_list_20201017T000000UTC.csv
Shape of matrix: (219, 219)
number of nodes in graph 219 length of cowlist 219
Document No. 2
TM path: ./time_matrix\Time_FA_20201018T000000UTC.csv CL path: ./cow_list\Cow_list_20201018T000000UTC.csv
Shape of matrix: (208, 208)
number of nodes in graph 208 length of cowlist 208
Document No. 3
TM path: ./time_matrix\Time_FA_20201019T000000UTC.csv CL path: ./cow_list\Cow_list_20201019T000000UTC.csv
Shape of matrix: (209, 209)
number of nodes in graph 209 length of cowlist 209
Document No. 4
TM path: ./time_matrix\Time_FA_20201020T000000UTC.csv CL path: ./cow_list\Cow_list_20201020T000000UTC.csv


In [5]:
# Clique Percolation algorithm
def community_detection_PC(i,pos,G_AM):
    # Remove the nodes whose degree is zero
    nodes_removed = [node for node,degree in dict(G_AM.degree()).items() if degree == 0]
    G_AM.remove_nodes_from(nodes_removed)
    
    #Start percolation clique algorithm
    #listcommunities_PC = nx.algorithms.community.k_clique_communities(G_AM,3)
    communities_PC_frozen = list(nx.algorithms.community.k_clique_communities(G_AM,3))
    communities_PC = [set(x) for x in communities_PC_frozen]
    len_PC = len(communities_PC)
    print(len_PC)
    #print('Modularity',nx.algorithms.community.quality.modularity(G_AM,communities_PC))
    # Give the nodes in the graph an attribute:community_PC
    # The erial number of communities starts from 1
    community_dict_PC = defaultdict(list)  
    community_num_PC = 1
    for community_PC in communities_PC:
        for character_PC in community_PC:
            community_dict_PC[character_PC].append(community_num_PC)
            nx.set_node_attributes(G_AM, community_dict_PC, 'community_PC')
        community_num_PC += 1
    
    # Part for plotting and saving figures

    # compute graph layout
    #pos = nx.kamada_kawai_layout(G_AM)
    #pos = nx.random_layout(G_AM) 
    #pos = nx.circular_layout(G_AM)  
    #pos = nx.shell_layout(G_AM)
    pos = nx.spring_layout(G_AM, k=0.2, pos=None, fixed=None, iterations=50, threshold=0.0001, weight='weight', scale=1, center=None, dim=2, seed=7) 

    # image size
    plt.figure(figsize=(30, 30)) 
    nx.draw_networkx_nodes(G_AM, pos, node_size = 100,node_color = 'black',alpha = 0.1)
    nx.draw_networkx_edges(G_AM, pos, alpha=0.01)
    nx.draw_networkx_labels(G_AM, pos, alpha= 0.5, font_color='grey')
    # Colormap for plotting
    color_PC = 0
    random.seed(7)
    total_colors = list(mpl.colors.get_named_colors_mapping())
    total_colors.remove('black')
    color_map_PC = random.sample(total_colors,len_PC)
    #color_map_PC = ['red', 'blue','yellow','purple','pink','green','pink','brown','cyan','gold','olive','navy','hotpink','tomato','crimson','azure','peru']
    for community_PC in communities_PC:
        nx.draw_networkx_nodes(G_AM, pos , nodelist = community_PC, node_size = 200, node_color = color_map_PC[color_PC])
        nx.draw_networkx_edges(G_AM, pos ,edgelist = list(itertools.chain.from_iterable([list(G_AM.edges(node)) for node in community_PC])) ,edge_color = color_map_PC[color_PC], alpha = 0.5)
        color_PC += 1

    plt.savefig('./community/PC/Day%d.png'%i)    
    #plt.show()
    plt.close()
    
    return communities_PC

In [6]:
# Percolation clique algorithm
bar = progressbar.ProgressBar()
communities_PC_14 = []
for i in bar(range(1,len(data_dict_list)+1)):
    pos = nx.spring_layout(data_dict_list[i-1].get('Graph'))
    communities_PC_14.append(community_detection_PC(i,pos,data_dict_list[i-1].get('Graph')))

  0% (0 of 14) |                         | Elapsed Time: 0:00:00 ETA:  --:--:--

28


  7% (1 of 14) |#                        | Elapsed Time: 0:00:01 ETA:   0:00:16

29


 14% (2 of 14) |###                      | Elapsed Time: 0:00:02 ETA:   0:00:15

28


 21% (3 of 14) |#####                    | Elapsed Time: 0:00:04 ETA:   0:00:15

23


 28% (4 of 14) |#######                  | Elapsed Time: 0:00:05 ETA:   0:00:14

32


 35% (5 of 14) |########                 | Elapsed Time: 0:00:06 ETA:   0:00:12

32


 42% (6 of 14) |##########               | Elapsed Time: 0:00:08 ETA:   0:00:10

35


 50% (7 of 14) |############             | Elapsed Time: 0:00:09 ETA:   0:00:09

33


 57% (8 of 14) |##############           | Elapsed Time: 0:00:10 ETA:   0:00:08

29


 64% (9 of 14) |################         | Elapsed Time: 0:00:12 ETA:   0:00:07

33


 71% (10 of 14) |#################       | Elapsed Time: 0:00:13 ETA:   0:00:05

42


 78% (11 of 14) |##################      | Elapsed Time: 0:00:15 ETA:   0:00:04

29


 85% (12 of 14) |####################    | Elapsed Time: 0:00:16 ETA:   0:00:02

31


 92% (13 of 14) |######################  | Elapsed Time: 0:00:17 ETA:   0:00:01

25


100% (14 of 14) |########################| Elapsed Time: 0:00:18 Time:  0:00:18


In [7]:
for i in (range(1,len(data_dict_list))):
    print('Day %d and Day %d'%(i,i+1),'&',onmi(communities_PC_14[i-1],communities_PC_14[i]),'\\\\')
    

Day 1 and Day 2 & 0.04254915503710066 \\
Day 2 and Day 3 & 0.04304134032465179 \\
Day 3 and Day 4 & 0.027661635106150828 \\
Day 4 and Day 5 & 0.05096484421811753 \\
Day 5 and Day 6 & 0.04075323021885868 \\
Day 6 and Day 7 & 0.04740644066162636 \\
Day 7 and Day 8 & 0.05610520130384222 \\
Day 8 and Day 9 & 0.04510778294743001 \\
Day 9 and Day 10 & 0.046967654253012414 \\
Day 10 and Day 11 & 0.0525254200613251 \\
Day 11 and Day 12 & 0.045129212039993316 \\
Day 12 and Day 13 & 0.043908593976574206 \\
Day 13 and Day 14 & 0.046449961556535246 \\
