In [1]:
import pandas as pd
import numpy as np
import math as m
import time 
from pycowview.data import csv_read_FA
from pycowview.manipulate import unique_cows
from pycowview.metrics import interaction_time
import networkx as nx
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import itertools
import os
import community
from collections import defaultdict,Counter
import progressbar
import random
import itertools 
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics import f1_score

In [2]:
# Weighted version!
# This function will get the path of each csv file
def findAllFile(base):
    for root, ds, fs in os.walk(base):
        for f in fs:
            if f.endswith('.csv'):
                fullname = os.path.join(root, f)
                yield fullname

# Input is the folders where the time matrix and cowlist are saved
# Output is a list which consists of 14 dictionaries
# The structure of dictionary:Cowlist,TimeMatrix,AajacencyMatrix_binary,Unweighted_Graph
def time_matrix_to_graph(tm_folder,cl_folder):
    dict_list = []
    i = 0
    tmlist = list(findAllFile(tm_folder))
    tmlist.sort()
    cllist = list(findAllFile(cl_folder))
    cllist.sort()
    for tm,cl in zip(tmlist,cllist):
        # print(tm,cl)
        # Get the path of csv
        # get cowlist
        cowlist = np.loadtxt(cl,delimiter=",").astype(int)
        # load original time matrix from csv and process it to be an adjacency Matrix
        OM = np.asmatrix(np.loadtxt(tm,delimiter=","))
        
        # Get unweighted adjacency matrix(binary)
        # init adjacency matrix
        #AM = np.zeros((OM.shape))
        # set the threshold to be 30 minutes(1800 seconds)
        #epsilon = 1800
        # just consider if there is an edge between two cows, the edge is unweighted
        #AM[OM >= epsilon] = 1
        #AM[OM < epsilon] = 0 
        #np.fill_diagonal(AM,0)
        # Get graph from AM, no-direct and no-weight graph
        #G_AM_temp = nx.from_numpy_matrix(AM,parallel_edges=False,create_using = nx.Graph())
        
        # Get weighted adjacency matrix
        # load original matrix from csv and process it to be an adjacency Matrix
        AM = np.zeros((OM.shape))
        # set the threshold to be 30 minutes(1800 seconds)
        epsilon = 1800
        # consider if there is an edge between two cows, the edge is weighted
        maxnr=np.amax(OM)
        AM=np.where(OM<=epsilon,0,(OM-epsilon)/(maxnr-epsilon))
        np.fill_diagonal(AM,0)
        # Get graph from AM, no-direct and weighted graph
        G_AM_temp = nx.from_numpy_matrix(AM,parallel_edges=False,create_using = nx.Graph())
        
        # Make sure the order of cowlist is the same as the row name!
        print('Shape of matrix:',AM.shape)
        print('number of nodes in graph',len(G_AM_temp),'length of cowlist',len(cowlist))
        mapping = dict(zip(G_AM_temp, cowlist))
        #print(mapping)
        # Rename the nodes
        G_AM = nx.relabel_nodes(G_AM_temp, mapping)
        
        # Get the dict of the collection(CL,TM,AM_weighted,Graph)
        data_dict = dict(CL=cowlist,TM=OM,AM_weighted=AM,Graph=G_AM)
        print('Document No.',i)
        print('TM path:',tm,'CL path:',cl)
        i = i + 1
        dict_list.append(data_dict)

    print('The length of the list: ',len(dict_list))
    return dict_list

In [3]:
# This part is used to process the time matrices
tm_folder = './time_matrix'
cl_folder = './cow_list'
data_dict_list = time_matrix_to_graph(tm_folder,cl_folder)

Shape of matrix: (213, 213)
number of nodes in graph 213 length of cowlist 213
Document No. 0
TM path: ./time_matrix\Time_FA_20201016T000000UTC.csv CL path: ./cow_list\Cow_list_20201016T000000UTC.csv
Shape of matrix: (212, 212)
number of nodes in graph 212 length of cowlist 212
Document No. 1
TM path: ./time_matrix\Time_FA_20201017T000000UTC.csv CL path: ./cow_list\Cow_list_20201017T000000UTC.csv
Shape of matrix: (219, 219)
number of nodes in graph 219 length of cowlist 219
Document No. 2
TM path: ./time_matrix\Time_FA_20201018T000000UTC.csv CL path: ./cow_list\Cow_list_20201018T000000UTC.csv
Shape of matrix: (208, 208)
number of nodes in graph 208 length of cowlist 208
Document No. 3
TM path: ./time_matrix\Time_FA_20201019T000000UTC.csv CL path: ./cow_list\Cow_list_20201019T000000UTC.csv
Shape of matrix: (209, 209)
number of nodes in graph 209 length of cowlist 209
Document No. 4
TM path: ./time_matrix\Time_FA_20201020T000000UTC.csv CL path: ./cow_list\Cow_list_20201020T000000UTC.csv


# Statistics of graphs
Density, number of nodes and edges are included in the part.

In [4]:
# statistics of graphs
# density, number of nodes and edges
density = []
no_edges = []
no_nodes = []
bar = progressbar.ProgressBar()
# from 0 to 13
for i in bar(range(0,len(data_dict_list))):
    graph = data_dict_list[i].get('Graph')
    no_nodes.append(nx.classes.function.number_of_nodes(graph))
    no_edges.append(nx.classes.function.number_of_edges(graph))
    density.append(nx.classes.function.density(graph))
print(no_nodes)
print(no_edges)
print(density)

100% (14 of 14) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


[213, 212, 219, 208, 209, 208, 210, 210, 210, 209, 205, 210, 209, 205]
[1009, 1099, 1249, 1108, 992, 975, 1004, 976, 1106, 1006, 980, 1045, 1046, 1095]
[0.04468952077243334, 0.04913708307252079, 0.05232290226634829, 0.051467855815681904, 0.045638571954361426, 0.04528985507246377, 0.04575074048758259, 0.04447482342219184, 0.05039872408293461, 0.046282664703717336, 0.0468675274988044, 0.047619047619047616, 0.04812292970187707, 0.05236728837876614]


In [10]:
bar = progressbar.ProgressBar()
for i in bar(range(0,len(data_dict_list))):
    print(i+1,'&',no_nodes[i],'&',no_edges[i],'&','%.3f' %density[i],'\\\\')

100% (14 of 14) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


1 & 213 & 1009 & 0.045 \\
2 & 212 & 1099 & 0.049 \\
3 & 219 & 1249 & 0.052 \\
4 & 208 & 1108 & 0.051 \\
5 & 209 & 992 & 0.046 \\
6 & 208 & 975 & 0.045 \\
7 & 210 & 1004 & 0.046 \\
8 & 210 & 976 & 0.044 \\
9 & 210 & 1106 & 0.050 \\
10 & 209 & 1006 & 0.046 \\
11 & 205 & 980 & 0.047 \\
12 & 210 & 1045 & 0.048 \\
13 & 209 & 1046 & 0.048 \\
14 & 205 & 1095 & 0.052 \\
