Skip to content

Commit

Permalink
saving the score in the db
Browse files Browse the repository at this point in the history
  • Loading branch information
janez87 committed Mar 20, 2017
1 parent 38d7271 commit 1b15e8b
Show file tree
Hide file tree
Showing 9 changed files with 1,040 additions and 13 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
@@ -0,0 +1,3 @@
{
"python.linting.pylintEnabled": false
}
10 changes: 5 additions & 5 deletions app.py
Expand Up @@ -61,15 +61,15 @@ def run():

@app.route('/start')
def start_pipeline():
dbManager = DBManager("ske_db")
dbManager = DBManager("ske_db",1)

pipeline = Pipeline(dbManager,"./knowledge_extractor/data/expert_types.csv")
scores = pipeline.run()

#pipeline = Pipeline(dbManager)
#scores = pipeline.run()
_thread.start_new_thread(Pipeline, (dbManager,1))
#pprint.pprint(fv["seeds"].head())
#fv["candidates"].to_csv("cand_fv.csv")
#fv["seeds"].to_csv("seed_fv.csv")
return scores
return render_template('redirect.html',title='Completed Request')

if __name__ == '__main__':
app.run()
245 changes: 245 additions & 0 deletions cand.csv

Large diffs are not rendered by default.

245 changes: 245 additions & 0 deletions cand_fv.csv

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions knowledge_extractor/data/expert_types.csv
@@ -0,0 +1,8 @@
[
"http://dbpedia.org/ontology/Broadcaster",
"http://dbpedia.org/ontology/Artist",
"http://dbpedia.org/ontology/Magazine",
"http://dbpedia.org/ontology/Model",
"http://dbpedia.org/ontology/Organisation",
"http://dbpedia.org/ontology/TelevisionShow"
]
17 changes: 10 additions & 7 deletions knowledge_extractor/pipeline.py
Expand Up @@ -48,8 +48,8 @@ def computeSeedVectors(self,seeds):

for seed in seeds:
#computer array of mentioned entity
mentions[seed["_id"]] = ehe.getEntities(seed)
ast_mentions[seed["_id"]] = ast.getEntities(seed)
mentions[seed["handle"]] = ehe.getEntities(seed)
ast_mentions[seed["handle"]] = ast.getEntities(seed)

space_ehe = self.createSpace(mentions)
space_ast = self.createSpace(ast_mentions)
Expand Down Expand Up @@ -79,8 +79,8 @@ def computeCandidatesVectors(self,cands,space_ast,space_ehe):

for cand in cands:
#computer array of mentioned entity
mentions[cand["_id"]] = ehe.getEntities(cand)
ast_mentions[cand["_id"]] = ast.getEntities(cand)
mentions[cand["handle"]] = ehe.getEntities(cand)
ast_mentions[cand["handle"]] = ast.getEntities(cand)


print("Creating feature vector for the candidates")
Expand Down Expand Up @@ -109,11 +109,13 @@ def run(self):
centroid = self.createCentroid(feature_vectors["seeds"])
centroid = centroid.values

scores = feature_vectors["candidates"].apply(lambda row: cosine(row,centroid),axis=1)
pprint.pprint(scores)
scores = feature_vectors["candidates"].apply(lambda row: 1-cosine(row,centroid),axis=1)

self.db.saveScores(scores)

return scores

def __init__(self,db,expertFile):
def __init__(self,db,experiment_id):
self.alfa=0.7
self.db=db
self.expertFile = [
Expand All @@ -124,3 +126,4 @@ def __init__(self,db,expertFile):
"http://dbpedia.org/ontology/Organisation",
"http://dbpedia.org/ontology/TelevisionShow"
]
self.run()
255 changes: 255 additions & 0 deletions seed_fv.csv

Large diffs are not rendered by default.

255 changes: 255 additions & 0 deletions test.csv

Large diffs are not rendered by default.

15 changes: 14 additions & 1 deletion utils/DBManager.py
Expand Up @@ -17,7 +17,20 @@ def getMentions(self,query):
def getMentionType(self,query):
collection = "entity"
return self.db[collection].find(query,{"types":1})

def saveScores(self,scores):
collection = "rankings"

for k,v in scores.items():
score = {
"handle":k,
"score":v,
"experiment_id":self.experiment_id
}
self.db[collection].insert(score)


def __init__(self,dbmane):
def __init__(self,dbmane,experiment_id):
self.client = MongoClient()
self.db = self.client[dbmane]
self.experiment_id = experiment_id

0 comments on commit 1b15e8b

Please sign in to comment.