In [1]:
import snap
from datetime import datetime
import matplotlib.pyplot as plt
import util

edgeFile = 'brightkite/loc-brightkite_edges.txt'
checkinFile = 'brightkite/loc-brightkite_totalCheckins.txt'

# Build the Graphs
userGraph, geoGraph, visitGraph, coordinates, checkins = util.buildGraphs(edgeFile, checkinFile)

# Degree distributions
util.userDeg(userGraph)
util.geoDeg(geoGraph)
util.visitDeg(visitGraph)



In [68]:
userNum = userGraph.GetNodes()
placeNumI = []
newRateI = []
for node in userGraph.Nodes():
    NId = node.GetId()
    if NId in checkins.keys():
        checkinfo = checkins[NId]
        timeline = sorted(checkinfo.items(), key = lambda x:x[0])
        places = [item[1] for item in timeline]
        placeNumI.append(len(set(places)))
        oldIndex = len(places) / 2
        old = set(places[0:oldIndex])
        newVisit = 0
        for k in places[oldIndex:]:
            if k not in old:
                newVisit += 1
        newRateI.append(1.0 * newVisit / (len(places) - oldIndex))

In [23]:
plt.scatter(placeNumI, newRateI, s = 0.5, edgecolors = 'face')
plt.savefig('test.pdf')
plt.close()

In [72]:
new = dict(collections.Counter(newRateI))
sortednew = sorted(new.items(), key = lambda x:x[0], reverse = True)
x = [item[0] for item in sortednew]
y = [item[1] for item in sortednew]
plt.plot(x, y)
plt.xscale('log')
plt.yscale('log')
plt.xlim(xmax = 10)
plt.savefig('newrate_dist.pdf')
plt.close()

In [57]:
import collections
dominant = []
dominantRate = []
timeStart = datetime.now()
for node in userGraph.Nodes():
    NId = node.GetId()
    if NId in checkins.keys():
        checkinfo = checkins[NId]
        timeline = sorted(checkinfo.items(), key = lambda x:x[0])
        places = [item[1] for item in timeline]
        Pmc = util.massCenter(places, coordinates)
        placeNum = len(dict(collections.Counter(places)))
        R = util.radius(places, coordinates, Pmc)
        for k in xrange(1, placeNum + 1):
            if util.radius(places, coordinates, util.massCenterK(places, coordinates, k)) <= 1.05 * R:
                dominantRate.append(100.0 * k / placeNum)
                dominant.append(k)
                break

In [74]:
# Test delta
deltas = [1.005, 1.01, 1.02, 1.03, 1.04, 1.05]
plots = [None] * 6
for i in xrange(6):
    delta = deltas[i]
    dominant = []
    dominantRate = []
    for node in userGraph.Nodes():
        NId = node.GetId()
        if NId in checkins.keys():
            checkinfo = checkins[NId]
            timeline = sorted(checkinfo.items(), key = lambda x:x[0])
            places = [item[1] for item in timeline]
            Pmc = util.massCenter(places, coordinates)
            placeNum = len(dict(collections.Counter(places)))
            R = util.radius(places, coordinates, Pmc)
            for k in xrange(1, placeNum + 1):
                if util.radius(places, coordinates, util.massCenterK(places, coordinates, k)) <= delta * R:
                    # dominantRate.append(100.0 * k / placeNum)
                    dominant.append(k)
                    break
    domi = dict(collections.Counter(dominant))
    sorteddomi = sorted(domi.items(), key = lambda x:x[0], reverse = True)
    x = [item[0] for item in sorteddomi]
    y = [item[1] for item in sorteddomi]
    plt.plot(x, y)
    plots[i] ,= plt.plot(x, y)
plt.xscale('log')
plt.yscale('log')
plt.legend(tuple(plots), tuple(deltas))
plt.xlabel('Dominance Index')
plt.ylabel('Frequency')
plt.savefig('dominant_dist.pdf')

In [64]:
domi = dict(collections.Counter(dominant))
sorteddomi = sorted(domi.items(), key = lambda x:x[0], reverse = True)
x = [item[0] for item in sorteddomi]
y = [item[1] for item in sorteddomi]
plt.plot(x, y)
plt.xscale('log')
plt.yscale('log')
plt.savefig('dominant_dist.pdf')
plt.close()

In [66]:
plt.scatter(newRateI, dominant, s = 0.5, edgecolors = 'face')
plt.savefig('test2.pdf')
plt.close()

In [77]:
# Get indexes for users (users who haven't checked in are considered 0)
userNum = userGraph.GetNodes()
placeNumI = []
newRateI = []
for NId in xrange(userNum):
    if NId in checkins.keys():
        checkinfo = checkins[NId]
        timeline = sorted(checkinfo.items(), key = lambda x:x[0])
        places = [item[1] for item in timeline]
        placeNumI.append(len(set(places)))
        if len(places) == 1:
            newRateI.append(0.0)
        else:
            oldIndex = len(places) / 2
            old = set(places[0:oldIndex])
            newVisit = 0
            for k in places[oldIndex:]:
                if k not in old:
                    newVisit += 1
            newRateI.append(1.0 * newVisit / (len(places) - oldIndex))
    else:
        placeNumI.append(0.0)
        newRateI.append(0.0)

# Plot newRateI
new = dict(collections.Counter(newRateI))
sortednew = sorted(new.items(), key = lambda x:x[0], reverse = True)
x = [item[0] for item in sortednew]
y = [item[1] for item in sortednew]
plt.plot(x, y, color = '#088da5')
plt.xscale('log')
plt.yscale('log')
plt.xlim(xmax = 1)
plt.xlabel('New Rate Index')
plt.ylabel('Frequency')
plt.savefig('newrate_dist.pdf')
plt.close()

In [79]:
len(newRateI)

58228

In [81]:
userGraph.GetNodes()

58228

In [82]:
len(dominant)

51406

In [84]:
with open('newRateIndex_full.txt', 'w') as f:
    f.write('\n'.join([str(i) for i in newRateI]))

In [85]:
with open('degreeIndex_full.txt', 'w') as f:
    f.write('\n'.join([str(i) for i in placeNumI]))

In [86]:
dominant = []
for node in userGraph.Nodes():
    NId = node.GetId()
    if NId in checkins.keys():
        checkinfo = checkins[NId]
        timeline = sorted(checkinfo.items(), key = lambda x:x[0])
        places = [item[1] for item in timeline]
        Pmc = util.massCenter(places, coordinates)
        placeNum = len(dict(collections.Counter(places)))
        R = util.radius(places, coordinates, Pmc)
        for k in xrange(1, placeNum + 1):
            if util.radius(places, coordinates, util.massCenterK(places, coordinates, k)) <= 1.01 * R:
                # dominantRate.append(100.0 * k / placeNum)
                dominant.append(k)
                break

with open('dominanceIndex.txt', 'w') as f:
    f.write('\n'.join([str(i) for i in dominant]))

In [87]:
userNum = userGraph.GetNodes()
placeNumI = []
newRateI = []
for NId in xrange(userNum):
    if NId in checkins.keys():
        checkinfo = checkins[NId]
        timeline = sorted(checkinfo.items(), key = lambda x:x[0])
        places = [item[1] for item in timeline]
        placeNumI.append(len(set(places)))
        if len(places) == 1:
            newRateI.append(0.0)
        else:
            oldIndex = len(places) / 2
            old = set(places[0:oldIndex])
            newVisit = 0
            for k in places[oldIndex:]:
                if k not in old:
                    newVisit += 1
            newRateI.append(1.0 * newVisit / (len(places) - oldIndex))
with open('newRateIndex_part.txt', 'w') as f:
    f.write('\n'.join([str(i) for i in newRateI]))
with open('degreeIndex_part.txt', 'w') as f:
    f.write('\n'.join([str(i) for i in placeNumI]))

In [5]:
newRateI = []
with open('newRateIndex_part.txt') as f:
    for line in f: newRateI.append(float(line.split()[0]))
placeNumI = []
with open('degreeIndex_part.txt') as f:
    for line in f: placeNumI.append(int(line.split()[0]))
dominant = []        
with open('dominanceIndex.txt') as f:
    for line in f: dominant.append(int(line.split()[0]))
from pandas.tools.plotting import scatter_matrix
import numpy as np
nI = np.log(newRateI)
pI = np.log(placeNumI)
dI = np.log(dominant)


In [8]:
import pandas as pd
df = pd.DataFrame({'Degree Index':placeNumI, 'New Rate Index': newRateI, 'Dominance Index': dominant})

In [10]:
scatter_matrix(df, alpha=0.2, figsize=(10, 10), diagonal='kde')
import matplotlib.pyplot as plt
plt.savefig('user_scatter_matrix.pdf')
plt.close()

In [13]:
snap.GetAnfEffDiam(snap.GetMxWcc(userGraph))

7.117436556200229

In [15]:
snap.GetAnfEffDiam(snap.GetMxWcc(geoGraph))

12.00716661920268

In [12]:
def dom(placeList, coordinates, delta):
    R = util.radius(placeList, coordinates)
    placeNum = len(dict(collections.Counter(placeList)))
    for k in xrange(1, placeNum + 1):
        if util.radiusK(placeList, coordinates, k) >= delta * R:
            return k, 100.0 * k / placeNum

In [25]:
# Test delta
import util
import collections
deltas = [0.6, 0.7, 0.8, 0.9, 0.95, 0.99]
dominant = {delta:[] for delta in deltas}
dominantRate = {delta:[] for delta in deltas}
plots = [None] * 6
labels = ['delta = ' + str(delta) for delta in deltas]
for i in xrange(6):
    delta = deltas[i]
    for node in userGraph.Nodes():
        NId = node.GetId()
        if NId in checkins.keys():
            checkinfo = checkins[NId]
            timeline = sorted(checkinfo.items(), key = lambda x:x[0])
            places = [item[1] for item in timeline]
            a, b = dom(places, coordinates, delta)
            dominant[delta].append(a)
            dominantRate[delta].append(b)
    domi = dict(collections.Counter(dominant[delta]))
    sorteddomi = sorted(domi.items(), key = lambda x:x[0], reverse = True)
    x = [item[0] for item in sorteddomi]
    y = [item[1] for item in sorteddomi]
    plots[i] ,= plt.plot(x, y)
plt.xscale('log')
plt.yscale('log')
plt.legend(tuple(plots), tuple(labels))
plt.xlabel('Dominance Index')
plt.ylabel('Frequency')
plt.savefig('dominant_dist.pdf')

for i in xrange(6):
    delta = deltas[i]
    with open('dominanceI' + str(i) + '.txt', 'w') as f:
        f.write('\n'.join([str(i) for i in dominant[delta]]))
    with open('dominanceRateI' + str(i) + '.txt', 'w') as f:
        f.write('\n'.join([str(i) for i in dominantRate[delta]]))

In [26]:
len(dominant[0.99])

51406

In [1]:
newRateI = []
with open('newRateIndex_part.txt') as f:
    for line in f: newRateI.append(float(line.split()[0]))
placeNumI = []
with open('degreeIndex_part.txt') as f:
    for line in f: placeNumI.append(int(line.split()[0]))

In [2]:
dominanceI = []
with open('dominanceI2.txt') as f:
    for line in f: dominanceI.append(int(line.split()[0]))

In [3]:
import pandas as pd
df = pd.DataFrame({'Degree Index':placeNumI, 'New Rate Index': newRateI, 'Dominance Index': dominanceI})

In [6]:
from pandas.tools.plotting import scatter_matrix
import numpy as np
scatter_matrix(df, alpha=0.2, figsize=(20, 20), diagonal='kde')
import matplotlib.pyplot as plt
plt.savefig('user_scatter_matrix.png')
plt.close()