In [10]:
import os
import re
import sqlite3
import csv
import pandas as pd
from datetime import datetime
from tqdm.auto import tqdm
import json

In [11]:
counterTableName = 'counter'
# to keep track of the logical time of last inserted edge i.e total number of edges

dbname = os.path.join('toy.db')
# train data DB

# graphs pre loaded to calculate regularity score
edgeTableName = 'edges'  
# graph on which regularity score calculated
evalEdgeTableName = 'edgestemp'


totalTimeWindows = 20
timeWindow = 1
totalTime = 0

# reScoresTable stores regularity score mapped with edge data {'edge': 'score')}
reScoresTable = {}

def timeWindowCalc():
    global totalTime
    global timeWindow
    global totalTimeWindows
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    totalTime = cur.execute("""select * from {};""".format(counterTableName)).fetchone()[0]
    timeWindow = int(totalTime / totalTimeWindows)
    conn.close()

def getStability(src, edgeType, dest):
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    
    #out(src)      #in(dest)

    #same notation as in research paper , initial count = 0
    T_from = 0      #out stable
    T_to = 0        #in stable
    T_total = 0

    for timeStart in  range(1, totalTime, timeWindow):
        timeEnd = timeStart + timeWindow - 1 

        qry = """SELECT count(*) from {}  
                where src_type = '{}' and 
                edge_type = '{}' and
                dest_type != '{}' and
                logical_time >= {} and logical_time <= {};""".format(edgeTableName, src, edgeType, dest, timeStart, timeEnd)

        countFrom = cur.execute(qry).fetchall()[0][0]

        qry = """SELECT count(*) from {}  
                where dest_type = '{}' and 
                edge_type = '{}' and
                src_type != '{}' and
                logical_time >= {} and logical_time <= {};""".format(edgeTableName, dest, edgeType, src, timeStart, timeEnd)

        countTo = cur.execute(qry).fetchall()[0][0]
        # print('cf-{} ct-{} timestart-{}'.format(countFrom, countTo, timeStart))
        if countFrom == 0:
              T_from = T_from + 1 
        if countTo == 0:
              T_to = T_to + 1
        T_total = T_total +1

    IN_dest = float(T_to) / float(T_total)
    OUT_src = float(T_from) / float(T_total)
    
    ###
    # avoiding zero value for stability
    ###
    if IN_dest == 0.0:
        IN_dest = 0.0000001
    if OUT_src == 0.0:
        OUT_src = 0.0000001
    ###
    
    conn.close()
    
    reScore = float(IN_dest * OUT_src)
    reScoresTable["{}{}{}".format(src, edgeType, dest)] = reScore
    # print("{} -- {}{}{}".format(reScore, src, edgeType, dest))


#input as src_type, edge_type, dest_type
def calculateRegularityScore(src_type, edge_type, dest_type):
    if totalTime == 0:
        print('timeWindowCalc()') 
        timeWindowCalc()
    
    if reScoresTable.get(src_type + edge_type + dest_type) == None:
        getStability(src_type, edge_type, dest_type)
    
    return reScoresTable.get(src_type + edge_type + dest_type)

In [12]:
def evaluvateGraph(tableName, graphFile):
    reScores = {}
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()

    qry = """SELECT DISTINCT dest_id from {}  
            where dest_id not in
            (SELECT DISTINCT dest_id from {}  
            where dest_id not in ( SELECT DISTINCT src_id from {} ))""".format(tableName, tableName, tableName)

    nonLeafDestNodes = cur.execute(qry).fetchall()
    conn.commit()
    conn.close()
    
        
    chunk = pd.read_csv(graphFile, chunksize=10000)
    df = pd.concat(chunk)
    with tqdm(total=len(df.index)) as pbar:
        for index, row in df.iterrows():
            src_id = int(row['src_id'])
            dest_id = int(row['dest_id'])
            if reScores.get(src_id) == None:
                reScores[src_id] = 1.0


            reScore = calculateRegularityScore(row['src_type'], row['edge_type'], row['dest_type'])
            reScores[dest_id] = reScores[src_id] * reScore
            # print('{}---- {} {} {}'.format(reScore, row['src_type'], row['edge_type'], row['dest_type']))
            
            pbar.update(1)
    for d in nonLeafDestNodes:
        reScores.pop(d[0])
        
    return reScores

In [13]:
def convertTxtToCsv(fname):
    with open( 'parsed-graphs/' + fname + '.txt', 'r') as in_file:
        stripped = (line.strip() for line in in_file)
        lines = (re.split(' |:', line) for line in stripped if line)
        with open('test-graphs/' + fname + '.csv', 'w') as out_file:
            writer = csv.writer(out_file)
            writer.writerow(('src_id', 'dest_id', 'src_type', 'dest_type', 'edge_type', 'logical_time'))
            writer.writerows(lines)

def populateDB(fname):
    # Connect to SQLite database
    conn = sqlite3.connect(r'toy.db')

    # Load CSV data into Pandas DataFrame
    stud_data = pd.read_csv('test-graphs/' + fname + '.csv')
    # Write the data to a sqlite table
    stud_data.to_sql(''.join(fname.split('-')), conn, if_exists='replace')
    conn.close()

def deleteTable(fname):
    fname = fname.replace('-', '')
    # Connect to SQLite database
    conn = sqlite3.connect(r'toy.db')
    cur = conn.cursor()

    qry = """DROP TABLE IF EXISTS {}""".format(fname)
    res = cur.execute(qry)
    conn.commit()
    conn.close()
    return res
    

def test():
    fnames = []
    ### give range of test files here
    for i in range(70, 85):
        fnames.append('base-'+str(i))
    for i in range(170, 185):
        fnames.append('base-'+str(i))
    for i in range(300, 315):
        fnames.append('base-'+str(i))
    print(fnames)
    with tqdm(total= len(fnames)) as pbar1:
        for f in fnames:
            if os.path.exists('parsed-graphs/' + f + '.txt'):
                convertTxtToCsv(f)
                populateDB(f)
                ### temp DB table name as fname
                tableName = ''.join(f.split('-'))
                reScores = evaluvateGraph(tableName , 'test-graphs/' + f + '.csv')
                deleteTable(f)
                with open('result/' + f + '.json', 'w') as fp:
                    json.dump(reScores, fp)
            else:
                print('{} does not exists'.format(f))
            pbar1.update(1)
        
test()

['base-70', 'base-71', 'base-72', 'base-73', 'base-74', 'base-75', 'base-76', 'base-77', 'base-78', 'base-79', 'base-80', 'base-81', 'base-82', 'base-83', 'base-84', 'base-170', 'base-171', 'base-172', 'base-173', 'base-174', 'base-175', 'base-176', 'base-177', 'base-178', 'base-179', 'base-180', 'base-181', 'base-182', 'base-183', 'base-184', 'base-300', 'base-301', 'base-302', 'base-303', 'base-304', 'base-305', 'base-306', 'base-307', 'base-308', 'base-309', 'base-310', 'base-311', 'base-312', 'base-313', 'base-314']


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/101280 [00:00<?, ?it/s]

timeWindowCalc()


  0%|          | 0/216745 [00:00<?, ?it/s]

  0%|          | 0/105935 [00:00<?, ?it/s]

  0%|          | 0/111652 [00:00<?, ?it/s]

  0%|          | 0/112569 [00:00<?, ?it/s]

  0%|          | 0/98641 [00:00<?, ?it/s]

  0%|          | 0/219583 [00:00<?, ?it/s]

  0%|          | 0/102220 [00:00<?, ?it/s]

  0%|          | 0/100268 [00:00<?, ?it/s]

  0%|          | 0/109131 [00:00<?, ?it/s]

  0%|          | 0/100138 [00:00<?, ?it/s]

  0%|          | 0/150109 [00:00<?, ?it/s]

base-82 does not exists
base-83 does not exists


  0%|          | 0/98267 [00:00<?, ?it/s]

  0%|          | 0/33074 [00:00<?, ?it/s]

  0%|          | 0/32016 [00:00<?, ?it/s]

  0%|          | 0/32590 [00:00<?, ?it/s]

  0%|          | 0/33466 [00:00<?, ?it/s]

  0%|          | 0/31208 [00:00<?, ?it/s]

  0%|          | 0/40000 [00:00<?, ?it/s]

  0%|          | 0/32001 [00:00<?, ?it/s]

  0%|          | 0/37768 [00:00<?, ?it/s]

  0%|          | 0/33217 [00:00<?, ?it/s]

  0%|          | 0/32930 [00:00<?, ?it/s]

  0%|          | 0/32457 [00:00<?, ?it/s]

  0%|          | 0/39596 [00:00<?, ?it/s]

  0%|          | 0/39831 [00:00<?, ?it/s]

  0%|          | 0/34197 [00:00<?, ?it/s]

  0%|          | 0/39412 [00:00<?, ?it/s]

  0%|          | 0/28527 [00:00<?, ?it/s]

  0%|          | 0/28441 [00:00<?, ?it/s]

  0%|          | 0/28531 [00:00<?, ?it/s]

  0%|          | 0/28351 [00:00<?, ?it/s]

  0%|          | 0/28459 [00:00<?, ?it/s]

  0%|          | 0/28550 [00:00<?, ?it/s]

  0%|          | 0/28361 [00:00<?, ?it/s]

  0%|          | 0/28657 [00:00<?, ?it/s]

  0%|          | 0/28433 [00:00<?, ?it/s]

  0%|          | 0/28389 [00:00<?, ?it/s]

0it [00:00, ?it/s]

  0%|          | 0/28414 [00:00<?, ?it/s]

  0%|          | 0/28519 [00:00<?, ?it/s]

  0%|          | 0/28516 [00:00<?, ?it/s]

  0%|          | 0/28566 [00:00<?, ?it/s]