In [1]:
%matplotlib inline
# notebook
import matplotlib.pylab as pylab
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import pandas as pd
import networkx as nx
from math import sin, cos, sqrt, atan2, radians
from utils import *
from data import *
#make the graphs bigger
pylab.rcParams['figure.figsize'] = (32.0, 24.0)
pylab.rcParams['font.size'] = 24

In [None]:
data = pd.read_csv("eigg.csv") 
data.head()

In [None]:
# technically doesnt count the fact the earth isnt a sphere
def seperationInMetres(p1,p2):
    R = 6373.0
    
    lat1,lon1 = p1
    lat2, lon2 = p2
    
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    #convert to metres
    return distance * 1000 

def withinInspectableArea(centroid, p1, maxDistanceAway):
    return seperationInMetres(p1,centroid) <= maxDistanceAway

In [None]:
centroid = (56.907887,-6.1960333) #Eigg central
maxDistance = 10000 # Take everything

df = data[['Recorder', 'Latitude (WGS84)', 'Longitude (WGS84)', 'Start date year']]

df = df.dropna(subset=['Latitude (WGS84)'])
df = df.dropna(subset=['Longitude (WGS84)'])
df["Latitude (WGS84)"] = df["Latitude (WGS84)"].astype(np.float32)
df["Longitude (WGS84)"] = df["Longitude (WGS84)"].astype(np.float32)

df = df.dropna(subset=['Start date year'])

df = df.dropna(subset=['Recorder'])
df["Start date year"] = df["Start date year"].astype(np.int32)
df = df[(df['Start date year'] > 2010.0)]

df = df[df.apply(lambda x: withinInspectableArea(centroid,(x['Latitude (WGS84)'],x['Longitude (WGS84)']),maxDistance), axis=1)]
print(len(df['Recorder'].unique()))

In [None]:
maxDist = 100 #anything within 100 metres observed is counted

def retrieveRecorders(df):
    return df['Recorder'].unique()

def retrieveRecorderLocations(df):
    recorders = retrieveRecorders(df)
    locs = {}
    for recorder in recorders:
        entries = df[df['Recorder'] == recorder]
        locations = zip(entries['Latitude (WGS84)'], entries['Longitude (WGS84)'])
        locations = set(locations)
        locs[recorder] = locations
    
    return locs

def recorderCloseTo(recorder, recorderLocs):
    currEntries = recorderLocs[recorder]
    remainder = recorderLocs.keys()
    ret = []
    for k,an in enumerate(remainder):
        if an != recorder:
            targetEntries = list(recorderLocs[an])
            breakYet = False
            for entry1 in currEntries:
                for entry2 in targetEntries:
                    if seperationInMetres(entry1,entry2) <= maxDist:
                        ret.append(k)
                        breakYet = True
                        break
                if breakYet:
                    break
    return ret

def buildGraph(df):    
    recorders = retrieveRecorders(df)
    
    G = nx.Graph()
    labels={}
    for k,i in enumerate(recorders): labels[k] = i
    
    for k,i in enumerate(recorders): G.add_node(k)
    recorderToLatLonMapping = retrieveRecorderLocations(df)
    for k,a in enumerate(recorders):
        closeTo = recorderCloseTo(a, recorderToLatLonMapping)
        for c in closeTo:
            G.add_edge(k,c)
    return G, labels


G, labels = buildGraph(df)
pos=nx.spring_layout(G,k=1)
nx.draw_networkx_labels(G,pos,labels,font_size=16)
nx.draw(G, pos, node_size=500)

In [None]:
len(list(filter(lambda x: x[0] == 22, list(G.edges()))))

In [None]:
labels

In [None]:
file = nx.to_numpy_matrix(G)
print(file.shape)
print()
print(file)
np.savetxt("BasicSocialFile.txt",file, "%d")   

# Known observers before and since buyout

In [50]:
people = inferredNamesGraph()
observers = eiggRawData()

  if self.run_code(code, result):


In [51]:
#Exact Match
observers = observers.dropna(subset=['Recorder'])

preBuyout = observers[(observers['Start date year'] <= 1997.0) & (observers['Start date year'] > 1987.0)]
postBuyout = observers[(observers['Start date year'] > 1997.0) & (observers['Start date year'] <= 2007.0)]

preBuyout = preBuyout.dropna(subset=['Recorder'])
preBuyout = preBuyout["Recorder"]
preBuyout = list(preBuyout)
preBuyout = list(map(lambda x: x.lower(), preBuyout))

postBuyout = postBuyout.dropna(subset=['Recorder'])
postBuyout = postBuyout["Recorder"]
postBuyout = list(postBuyout)
postBuyout = list(map(lambda x: x.lower(), postBuyout))

people = people.keys()
people = list(map(lambda x: x.lower(), people))
people = list(filter(lambda x: ' ' in x, people))

In [52]:
len(postBuyout),len(preBuyout)

(10928, 3341)

In [60]:
def observationsByKnownPeople(people,observersList):
    total = 0
    for o in observersList:
        for p in people:
            if p in o:
                total += 1
                break
    return (total / len(observersList))*100

In [61]:
print(observationsByKnownPeople(people,preBuyout))
print(observationsByKnownPeople(people,postBuyout))

0.0
0.9791361639824305
