In [1]:
import re
import json
import numpy as np
import pandas as pd

df = pd.read_json(path_or_buf="output/user_skill_graph-TEST.json", orient="records")
with open("pair-app-data/skill-words.json") as input_file:
    words = json.load(input_file)

df['categories'] = df['categories'].apply(tuple)
new_rows = []
for index, row in df.iterrows():
    new_rows.extend([[row['helperId'], row['timestamp'], row['task'][0], nn, row['value']] for nn in row.categories])
expanded_df = pd.DataFrame(new_rows,columns=['helperId', 'timestamp', 'task', 'category', 'value'])

words = words['categories']

In [2]:
def parse_phrase_for_categories(input_phrase, category_list):
    matching_categories = []
    input_phrase = input_phrase.lower()
    
    for category_key, keyword_list in category_list.items():
        for keyword in keyword_list:
            keyword = keyword.lower()
            
            if re.search(r'' + keyword, input_phrase):
                matching_categories.append(category_key)
                
    return list(set(matching_categories))
            
    
def get_top_in_category(category, n, skill_dataframe):
    current_df = skill_dataframe.loc[skill_dataframe['category'] == category]
    top_users_df = current_df.groupby('helperId').mean().sort_values(by='value', ascending=False)
    return top_users_df.iloc[:min(n, top_users_df.shape[0])]

def parse_phrase_for_people(phrase, n, category_list, skill_dataframe):
    people_category_dictionary = {}
    matching_categories = parse_phrase_for_categories(phrase, category_list)
    for category in matching_categories:
        top_people = get_top_in_category(category, n, skill_dataframe)
        people_category_dictionary[category] = top_people
        
    return people_category_dictionary

In [3]:
task_categories = parse_phrase_for_categories("debug our meteor app", words)
print task_categories

[u'debugging', u'web/mobile dev', u'dev tools']


In [12]:
top_users = get_top_in_category('debugging', 5, expanded_df)
print top_users

                   value
helperId                
aupdNzYu8WmNEi4e5      5
u6DrqFSvdZnWDgjz8      5
6rZbYn3cbQ9KNLRM5      4
WenRzs2SSx8YmGYsF      3
iEHKgJBH7hNSroEjw      3


In [13]:
result = parse_phrase_for_people("debug our meteor app", 5, words, expanded_df)
print result

{u'debugging':                    value
helperId                
aupdNzYu8WmNEi4e5      5
u6DrqFSvdZnWDgjz8      5
6rZbYn3cbQ9KNLRM5      4
WenRzs2SSx8YmGYsF      3
iEHKgJBH7hNSroEjw      3, u'web/mobile dev':                       value
helperId                   
cupTmXWNEZ2N7vxPk  3.321429
6rZbYn3cbQ9KNLRM5  3.185185
aNdSTecskgeAm2St5  3.185185
nDHZGzczDWyqvyFhp  3.166667
mdhFQ6PNiAhfP7ce2  3.111111, u'dev tools':                       value
helperId                   
nDHZGzczDWyqvyFhp  3.000000
WenRzs2SSx8YmGYsF  2.888889
yLoAP6fd9WbQQPyhb  2.833333
dZ23LYG2G42mY3NAf  2.833333
N3LsK5NJeKm8PkBx8  2.625000}
