In [12]:
import snap
from datetime import datetime
import matplotlib.pyplot as plt
import collections

edgeFile = 'brightkite/loc-brightkite_edges.txt'
checkinFile = 'brightkite/loc-brightkite_totalCheckins.txt'

# Construct the multilayer graph
userGraph = snap.TUNGraph.New()
with open(edgeFile) as f:
    for line in f:
        a, b = line.split()
        a, b = int(a), int(b)
        if not userGraph.IsNode(a): userGraph.AddNode(a)
        if not userGraph.IsNode(b): userGraph.AddNode(b)
        userGraph.AddEdge(a, b)

In [2]:
checkins = {}
coordinates = {}
locNames = set([])
userIds = set([])
with open(checkinFile) as f:
    for i, line in enumerate(f):
        ss = line.split()
        if len(ss) != 5: continue
        user = int(ss[0])
        # time = datetime.strptime(ss[1], '%Y-%m-%dT%H:%M:%SZ')
        time = ss[1]
        loc = ss[-1]
        if loc not in locNames:
            coordinates[loc] = (float(ss[2]), float(ss[3]))
            locNames.add(loc)
        if user not in userIds:
            checkins[user] = {}
            userIds.add(user)
        checkins[user][time] = loc

In [3]:
print sum([len(checkins[k]) for k in checkins.keys()])

4702067


In [4]:
# Redefine easier location IDs
locations = list(locNames)
reverseLoc = {locations[i]: -(i + 2) for i in xrange(len(locations))}
for user in checkins.keys():
    for time in checkins[user].keys():
        loc = checkins[user][time]
        checkins[user][time] = reverseLoc[loc]

In [6]:
coordinates.items()[0:10]

[('d329e41cec6d11dda8e5003048c0801e', (-40.336368, -72.956429)),
 ('df1dae2c0f85e02461682bf460fdd24eabe163c6', (47.668481, -122.104427)),
 ('2d4920e7273c755704c06f2201832d89', (43.709601, -79.398934)),
 ('bd525286848a11deba23003048c10834', (36.56207, 136.662419)),
 ('a4ef963e84f83133484227465e2113e9', (32.762293, -79.988538)),
 ('ee69c2f6455f11deb1d7003048c10834', (40.720458, -74.042443)),
 ('93e02a0cdc5ad9369fe926019c6973e7', (36.090551, 140.109732)),
 ('ca71a82e677311de9323003048c10834', (42.209783, -72.629789)),
 ('474f93a6585111dea018003048c10834', (39.358233, -76.705081)),
 ('413754d668b411de9a19003048c0801e', (39.162128, -77.220604))]

In [7]:
# Construct geographical graph
geoGraph = snap.TNEANet.New()
geoGraph.AddIntAttrE("w")
cood = {}
for i in xrange(len(locations)):
    geoGraph.AddNode(-(i + 2))
    cood[-(i + 2)] = coordinates[locations[i]]
for cIn in checkins.values():
    timeline = sorted(cIn.items(), key = lambda x: x[0])
    for i in xrange(0, len(timeline) - 1):
        a = timeline[i][1]
        b = timeline[i + 1][1]
        if geoGraph.IsEdge(a, b):
            EI = geoGraph.GetEI(a, b)
            geoGraph.AddIntAttrDatE(EI, geoGraph.GetIntAttrDatE(EI, 'w') + 1, 'w')
        else:
            geoGraph.AddEdge(a, b)
            EI = geoGraph.GetEI(a, b)
            geoGraph.AddIntAttrDatE(EI, 1, 'w')

In [9]:
checkins.items()[0:10]

[(0,
  {'2009-10-19T19:39:07Z': -268645,
   '2009-10-02T03:35:25Z': -99121,
   '2010-05-16T03:23:23Z': -222808,
   '2009-12-18T02:47:01Z': -540844,
   '2010-04-07T22:57:07Z': -511225,
   '2009-12-15T19:16:02Z': -770049,
   '2010-04-29T03:56:23Z': -225667,
   '2009-07-17T06:40:37Z': -268645,
   '2009-06-24T20:23:51Z': -174969,
   '2009-09-27T06:09:28Z': -643502,
   '2010-10-13T20:05:43Z': -639349,
   '2010-03-16T03:07:58Z': -607171,
   '2010-01-25T17:02:16Z': -78394,
   '2010-03-19T13:57:05Z': -384440,
   '2010-03-15T15:44:27Z': -120252,
   '2009-09-17T19:45:20Z': -511254,
   '2010-04-21T02:32:36Z': -20793,
   '2010-01-01T02:26:47Z': -268447,
   '2010-09-13T18:34:13Z': -623567,
   '2010-03-16T17:13:47Z': -235431,
   '2009-12-02T20:21:28Z': -109751,
   '2010-01-16T14:38:07Z': -220821,
   '2010-07-03T00:35:31Z': -442748,
   '2009-06-05T02:49:38Z': -472856,
   '2009-05-26T16:28:10Z': -173623,
   '2009-07-09T08:12:05Z': -173623,
   '2010-05-02T20:18:47Z': -757611,
   '2010-02-19T20:41:12Z':

In [10]:
# Construct visitGraph between the layers of people and places
visitGraph = snap.TNEANet.New()
for user in checkins.keys():
    visitGraph.AddNode(user)
for i in xrange(len(locations)):
    visitGraph.AddNode(-(i + 2))
    
visitGraph.AddIntAttrE('w')
for user in checkins.keys():
    for time in checkins[user].keys():
        loc = checkins[user][time]
        if visitGraph.IsEdge(user, loc):
            EI = visitGraph.GetEI(user, loc)
            visitGraph.AddIntAttrDatE(EI, visitGraph.GetIntAttrDatE(EI, 'w') + 1, 'w')
        else:
            visitGraph.AddEdge(user, loc)
            EI = visitGraph.GetEI(user, loc)
            visitGraph.AddIntAttrDatE(EI, 1, 'w')

In [23]:
print timeline[0:5]

[('2009-05-25T20:56:10Z', -3756), ('2009-05-25T21:35:28Z', -17019), ('2009-05-25T21:37:44Z', -17019), ('2009-05-25T21:42:47Z', -17019), ('2009-05-25T22:13:23Z', -27069)]


In [11]:
# Degree distribution of userGraph
userNum = userGraph.GetNodes()
degrees = list()
counts = list()
DegToCntV = snap.TIntPrV()
snap.GetDegCnt(userGraph, DegToCntV)
for item in DegToCntV:
    degrees.append(item.GetVal1())
    counts.append(item.GetVal2())
# Normalize the counts
counts = [value / (1.0 * userNum) for value in counts]

plt.plot(degrees, counts, color = "#fa8072")
plt.xscale('log')
plt.yscale('log')
plt.title('Degree Distribution for User-User Network')
plt.xlabel('degree')
plt.ylabel('frequency')
plt.savefig('degree-user-user.pdf')
plt.close()

In [12]:
# Degree distribution of geoGraph
locNum = geoGraph.GetNodes()
degreesLoc = list()
countsLoc = list()
DegToCntV = snap.TIntPrV()
snap.GetDegCnt(geoGraph, DegToCntV)
for item in DegToCntV:
    degreesLoc.append(item.GetVal1())
    countsLoc.append(item.GetVal2())
# Normalize the counts
counts = [value / (1.0 * locNum) for value in counts]

plt.plot(degreesLoc, countsLoc, color = "#fa8072")
plt.xscale('log')
plt.yscale('log')
plt.title('Degree Distribution for Place-Place Network')
plt.xlabel('degree')
plt.ylabel('frequency')
plt.savefig('degree-loc-loc.pdf')
plt.close()

In [11]:
locNum = geoGraph.GetNodes()
degreeLoc = {}
for node in geoGraph.Nodes():
    d = max(node.GetInDeg(), node.GetOutDeg())
    if d in degreeLoc.keys():
        degreeLoc[d] += 1
    else:
        degreeLoc[d] = 1
degreeLoc = sorted(degreeLoc.items(), key = lambda x: x[0])
degrees = [item[0] for item in degreeLoc]
counts = [item[1] for item in degreeLoc]
counts = [1.0 * value / locNum for value in counts]
plt.plot(degrees, counts, color = "#fa8072")
plt.xscale('log')
plt.yscale('log')
plt.title('Degree Distribution for Place-Place Network')
plt.xlabel('degree')
plt.ylabel('frequency')
plt.savefig('degree-loc-loc.pdf')
plt.close()

In [14]:
# Degree distribution of visitGraph
degreesUser = list()
degreesLoc = list()
countsUser = list()
countsLoc = list()
degreeUser = {}
degreeLoc = {}
for node in visitGraph.Nodes():
    NId = node.GetId()
    d = node.GetDeg()
    if NId >= 0:
        if d in degreeUser.keys():
            degreeUser[d] += 1
        else:
            degreeUser[d] = 1
    else:
        if d in degreeLoc.keys():
            degreeLoc[d] += 1
        else:
            degreeLoc[d] = 1

dU = sorted(degreeUser.items(), key = lambda x: x[0])
dL = sorted(degreeLoc.items(), key = lambda x: x[0])
degreesUser = [item[0] for item in dU]
countsUser = [item[1] for item in dU]
degreesLoc = [item[0] for item in dL]
countsLoc = [item[1] for item in dL]
        
# Normalize the counts
numUser = sum(countsUser)
numLoc = sum(countsLoc)
countsUser = [value / (1.0 * numUser) for value in countsUser]
countsLoc = [value / (1.0 * numLoc) for value in countsLoc]

In [15]:
plt.plot(degreesUser, countsUser, color = "#fa8072")
plt.xscale('log')
plt.yscale('log')
plt.title("User's Degree Distribution for User-Place Network")
plt.xlabel('degree')
plt.ylabel('frequency')
plt.savefig('degree-user-loc.pdf')
plt.close()

plt.plot(degreesLoc, countsLoc, color = "#fa8072")
plt.xscale('log')
plt.yscale('log')
plt.title("Place's Degree Distribution for User-Place Network")
plt.xlabel('degree')
plt.ylabel('frequency')
plt.savefig('degree-loc-user.pdf')
plt.close()

In [23]:
print 1.0 * snap.GetMxWcc(userGraph).GetNodes() / userGraph.GetNodes()
print 1.0 * snap.GetMxWcc(geoGraph).GetNodes() / geoGraph.GetNodes()
print 1.0 * snap.GetMxWcc(visitGraph).GetNodes() / visitGraph.GetNodes()

0.974428110188
0.970454069131
0.96313921385


In [28]:
print 1.0 * snap.GetMxWcc(userGraph).GetEdges() / userGraph.GetEdges()
print 1.0 * snap.GetMxWcc(geoGraph).GetEdges() / geoGraph.GetEdges()
print 1.0 * snap.GetMxWcc(visitGraph).GetEdges() / visitGraph.GetEdges()

0.994707536505
0.984528583796
0.978627248168


In [29]:
snap.GetClustCf(userGraph)

0.17232592744613365

In [33]:
locCC = []
for node in geoGraph.Nodes():
    locCC.append(snap.GetNodeClustCf(geoGraph, node.GetId()))

In [34]:
print sum(locCC) / len(locCC)

0.151397514428


In [35]:
visitCC = []
for node in visitGraph.Nodes():
    visitCC.append(snap.GetNodeClustCf(visitGraph, node.GetId()))

In [36]:
print sum(visitCC) / len(visitCC)

0.0


In [None]:
# Degree distribution of geoGraph
locNum = geoGraph.GetNodes()
degreesLoc = list()
countsLoc = list()
DegToCntV = snap.TIntPrV()
snap.GetDegCnt(geoGraph, DegToCntV)
for item in DegToCntV:
    degreesLoc.append(item.GetVal1())
    countsLoc.append(item.GetVal2())
# Normalize the counts
counts = [value / (1.0 * locNum) for value in counts]

weightedLoc = {}
for node in geoGraph():
    degree = node.GetDeg()
    NId = node.GetId()
    weighted = 0
    for n in xrange(degree):
        weighted += node.GetNbrId(n)

plt.plot(degreesLoc, countsLoc, color = "#fa8072")
plt.xscale('log')
plt.yscale('log')
plt.title('Degree Distribution for Place-Place Network')
plt.xlabel('degree')
plt.ylabel('frequency')
plt.savefig('degree-loc-loc.pdf')
plt.close()