#!/usr/bin/python
# Maelstrom - visualizing email contacts
# Copyright(c) 2008-2009 Stefan Marsiske <my name at gmail.com>
# extracts a directed graph from the database.
# the edges are the messages,
# the nodes are the persons
# the edges have a type to/cc and a weight (number of mails)
import sys, os, cStringIO, codecs, datetime, getopt, platform, csv #TODO , ConfigParser
if(platform.machine()=='i686'):
import psyco
from sqlobject import *
from lib.objects import *
from lib.utils import decode_header
CONFIG = {'database': os.path.abspath('../db/messages.db'),
'personmapfile' : '../db/persons.map',
'format': "csv", # dot|graphxml|log|csv
'stats': False, # display overall stats
'egg': False, # filter out entity:
'egger': 'Marsiske Stefan',
}
#config = ConfigParser.ConfigParser()
#config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')])
personmap = {}
def usage():
print "usage: %s" % (sys.argv[0])
print "\t-h This Help"
print "\t-s|--stats Display top ten stats on in/out contacts."
# print "\t-f |--format== [dot,log,graphxml] Output format."
print "\t-f |--format== [dot,log,csv] Output format."
def counter(start=0):
while True:
start+=1
yield start
nodeIdGenerator=iter(counter(0))
edgeIdGenerator=iter(counter(0))
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
#ORIG:self.writer.writerow([s.encode("utf-8") for s in row])
self.writer.writerow(row)
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
class Node:
def __init__(self,name,receivers = []):
self.__dict__['id'] = "n"+str(nodeIdGenerator.next())
self.__dict__['name'] = name
self.__dict__['srcTypeStamps'] = []
self.__dict__['dstTypeStamps'] = []
def __getattr__(self, name):
if(self.__dict__.has_key(name)):
return self.__dict__[name]
else:
raise AttributeError, name
def __setattr__(self, name, value):
if(self.__dict__.has_key(name)):
self.__dict__[name] = value
else:
raise AttributeError, name
def __str__(self):
#return reduce(self.__dict__.keys(), lambda y,x: y+x+": "+repr(self.__dict__[x])+"\n")
return "%s\t(Wo:%d, Wi:%d)\n" % (self.name,
len(self.srcTypeStamps),
len(self.dstTypeStamps))
def __repr__(self):
return self.__str__()
def incSrcWeight(self,type,date):
self.__dict__['srcTypeStamps'].append((date,type))
def incDstWeight(self,type,date):
self.__dict__['dstTypeStamps'].append((date,type))
class Edge:
def __init__(self,sender,to):
self.__dict__['id'] = "e"+str(edgeIdGenerator.next())
self.__dict__['sender'] = sender
self.__dict__['to'] = to
self.__dict__['typestamps'] = []
def __getattr__(self, name):
if(self.__dict__.has_key(name)):
return self.__dict__[name]
else:
raise AttributeError, name
def __setattr__(self, name, value):
if(self.__dict__.has_key(name)):
self.__dict__[name] = value
else:
raise AttributeError, name
def __str__(self):
#return reduce(self.__dict__.keys(), lambda y,x: y+x+": "+repr(self.__dict__[x])+"\n")
return "%s -> %s W:%d\n" % (self.sender,
self.to,
len(self.typestamps))
def __repr__(self):
return self.__str__()
def incWeight(self,type,date):
self.__dict__['typestamps'].append((date,type))
class Graph:
def __init__(self):
self.__dict__['nodes'] = {}
self.__dict__['edges'] = {}
def __str__(self):
return "Nodes: %s\nEdges: %s\n" % (self.nodes, self.edges)
def __getattr__(self, name):
if(self.__dict__.has_key(name)):
return self.__dict__[name]
else:
raise AttributeError, name
def __setattr__(self, name, value):
if(self.__dict__.has_key(name)):
self.__dict__[name] = value
else:
raise AttributeError, name
def __repr__(self):
return self.__str__()
def addToGraph(self,date,sender,type,to):
src = edge = dst = None
# create nodes if not yet seen
if(not sender in self.nodes.keys()):
src = Node(sender)
self.nodes[sender] = src
else:
src = self.nodes[sender]
if(not to in self.nodes.keys()):
dst = Node(to)
self.nodes[to] = dst
else:
dst = self.nodes[to]
# create edge if a new one is found
if(not (sender,to) in self.edges.keys()):
edge = Edge(sender,to)
self.edges[(sender,to)] = edge
else:
edge = self.edges[(sender,to)]
# adjust weight on edge
edge.incWeight(type,date)
# adjust weight on nodes
src.incSrcWeight(type,date)
dst.incDstWeight(type,date)
def stats(self):
nodes = self.__dict__['nodes'].values()
nodes.sort(lambda x,y: cmp(len(y.srcTypeStamps)+len(y.dstTypeStamps),
len(x.srcTypeStamps)+len(x.dstTypeStamps)))
print "Top Overall\n",nodes[:10]
# print out top senders
nodes=self.__dict__['nodes'].values()
nodes.sort(lambda x,y: cmp(len(y.srcTypeStamps),
len(x.srcTypeStamps)))
print "Top Wo\n", nodes[:10]
# print out top recipients
nodes=self.__dict__['nodes'].values()
nodes.sort(lambda x,y: cmp(len(y.dstTypeStamps),
len(x.dstTypeStamps)))
print "Top Wi\n", nodes[:10]
# print out top edges
edges = self.__dict__['edges'].values()
edges.sort(lambda x,y: cmp(len(y.typestamps),len(x.typestamps)))
print "Top Overall edges\n",edges[:10]
if(CONFIG['egg']):
edges=filter(lambda x: ((x.sender!=CONFIG['egger'])),edges)
edges.sort(lambda x,y: cmp(len(y.typestamps),len(x.typestamps)))
print "Top Overall eggs\n",edges[:10]
def dot(self):
result="digraph G {\noverlap = false;\nsplines=true;\n"
result+=reduce(lambda y,x: y+'%s [ label="%s"];\n' % (self.nodes[x.name].id,x.name),
self.nodes.values(),"")
result+=reduce(lambda y,x: y+'"%s" -> "%s" [ weight="%d" ];\n' %
(self.nodes[x.sender].id, self.nodes[x.to].id, len(x.typestamps)),
self.edges.values(),"")
result+="}"
return result
def loadPersonMap():
if(os.path.exists(CONFIG['personmapfile'])):
fp = open(CONFIG['personmapfile'],'r')
while(fp):
line = fp.readline()
if not line:
break
(email,name) = line.split(" ",1)
personmap[email] = name.strip()
def buildGraph():
graph = Graph()
q = Role.select()
csvcoder=None
if(CONFIG['format']=="csv"):
csvcoder=UnicodeWriter(sys.stdout)
for edge in q:
sender = edge.msg
if(edge.msg.sender and edge.msg.sender.owner):
sender = edge.msg.sender.owner.fullname
elif(edge.msg.sender):
sender = edge.msg.sender.username+"@"+edge.msg.sender.mailserver
if(sender and personmap.has_key(sender)):
sender = personmap[sender]
receiver = edge.email
if(edge.email and edge.email.owner):
receiver = edge.email.owner.fullname
elif(edge.email):
receiver = edge.email.username+"@"+edge.email.mailserver
if(receiver and personmap.has_key(receiver)):
receiver = personmap[receiver]
if(CONFIG['format']=="log"):
print edge.msg.delivered,sender, edge.header.name, receiver
elif(CONFIG['format']=="csv"):
csvcoder.writerow(map(lambda x: str(x),(edge.msg.delivered,sender, edge.header.name, receiver)))
graph.addToGraph(edge.msg.delivered,sender,edge.header.name, receiver)
return graph
def dumpResults(graph):
#print graph
if(CONFIG['format']=="dot"):
print graph.dot()
if(CONFIG['stats']):
print graph.stats()
def main():
loadPersonMap()
graph = buildGraph()
dumpResults(graph)
if __name__=='__main__':
try:
opts, args = getopt.gnu_getopt(sys.argv[1:],
"hesf:d:",
["help",
"egg",
"stats",
"format=",
"database="])
except getopt.GetoptError:
usage()
sys.exit(2)
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-d", "--database"):
if(a and os.path.isfile(a)):
CONFIG['database'] = a
elif o in ("-e", "--egg"):
CONFIG['egg'] = False
elif o in ("-s", "--stats"):
CONFIG['stats'] = True
elif o in ("-f", "--format"):
if(a and a in ("dot", "log", "graphxml", "csv")):
CONFIG['format'] = a
else:
usage()
sys.exit()
if(platform.machine()=='i686'):
psyco.full()
sys.exit(main())