Skip to content

Commit

Permalink
Calculate community sizes; Helper scripts for reporting useful statis…
Browse files Browse the repository at this point in the history
…tics
  • Loading branch information
grammarware committed Jan 27, 2013
1 parent 508beab commit cb57e36
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 0 deletions.
40 changes: 40 additions & 0 deletions dblp/community.py
@@ -0,0 +1,40 @@
#! /usr/bin/env python
# this script computes the comminity size per year
import sys

names = {}

po = open('rdf/partOf.curated.txt','r')
for line in po.readlines():
x,r,y = line.strip().split('"')[1:4]
if r == ' partOf ' and y == "MoDELS":
names[x] = []
po.close()

pa = open('rdf/publishedAt.txt','r')
buf = 1000000
tmplines = pa.readlines(buf)
while tmplines:
for line in tmplines:
x,r,y = line.strip().split('"')[1:4]
if r == ' publishedAt ':
if y in names.keys():
if x not in names[y]:
names[y].append(x)
tmplines = pa.readlines(buf)
pa.close()

years = {}
for n in names.keys():
y = n.replace('/','').replace('-','')
for x in range(ord('a'),ord('z')+1):
y = y.replace(chr(x),'')
y = y[0:4]
if y not in years:
years[y] = []
for name in names[n]:
if name not in years[y]:
years[y].append(name)

for y in sorted(years.keys()):
print y,' ',len(years[y])
25 changes: 25 additions & 0 deletions dblp/reportx.py
@@ -0,0 +1,25 @@
#! /usr/bin/env python
# this script goes over partOf and curates it according to sameAs
import sys

venues = []
rel = sys.argv[1]
if len(sys.argv)>2:
print 'Using','rdf/%s.really.curated.txt' % rel
sa = open('rdf/%s.really.curated.txt' % rel,'r')
else:
print 'Using','rdf/%s.curated.txt' % rel
sa = open('rdf/%s.curated.txt' % rel,'r')
buf = 1000000
tmplines = sa.readlines(buf)
while tmplines:
for line in tmplines:
x,r,y = line.strip().split('"')[1:4]
if r == (' %s ' % rel):
z = x
if z not in venues:
venues.append(z)
tmplines = sa.readlines(buf)
sa.close()

print len(venues)
25 changes: 25 additions & 0 deletions dblp/reporty.py
@@ -0,0 +1,25 @@
#! /usr/bin/env python
# this script goes over partOf and curates it according to sameAs
import sys

venues = []
rel = sys.argv[1]
if len(sys.argv)>2:
print 'Using','rdf/%s.really.curated.txt' % rel
sa = open('rdf/%s.really.curated.txt' % rel,'r')
else:
print 'Using','rdf/%s.curated.txt' % rel
sa = open('rdf/%s.curated.txt' % rel,'r')
buf = 1000000
tmplines = sa.readlines(buf)
while tmplines:
for line in tmplines:
x,r,y = line.strip().split('"')[1:4]
if r == (' %s ' % rel):
z = y
if z not in venues:
venues.append(z)
tmplines = sa.readlines(buf)
sa.close()

print len(venues)

0 comments on commit cb57e36

Please sign in to comment.