In [1]:
import pymongo
client = pymongo.MongoClient('mongodb://localhost:27017/')
jobs_db = client['jobs']
table = jobs_db['extracted jobs wnh']

In [2]:
all_data = list(table.find())
print(all_data[-1])

{'skills': 'ocial Media Marketing, Marketing Strategy, Marketing, Internet Marketing, Advertising', 'title': 'FB likes and Linkedin page follower wanted', 'location': '/WorkProjects/ProjectDetail/FB-likes-and-Linkedin-page-follower-wanted/81517', 'date posted': '2017-10-02 16:59:57.993794', 'details': "hello,\r\n\r\nWe need 10K likes on our facebook page and 10K follower on Linkedin page.\r\n\r\nBid here with your pricing, I'll consider lowest one.\r\n\r\ngenuine or fake doesn't matter. I need quantity, not quality", '_id': ObjectId('59d22335593f980e1da549c8')}


In [3]:
import re
import operator


def is_number(s):
    try:
        float(s) if '.' in s else int(s)
        return True
    except ValueError:
        return False


def load_stop_words(stop_word_file):
    """
    Utility function to load stop words from a file and return as a list of words
    @param stop_word_file Path and file name of a file containing stop words.
    @return list A list of stop words.
    """
    stop_words = []
    for line in open(stop_word_file):
        if line.strip()[0:1] != "#":
            for word in line.split():  # in case more than one per line
                stop_words.append(word)
    return stop_words


def separate_words(text, min_word_return_size):
    """
    Utility function to return a list of all words that are have a length greater than a specified number of characters.
    @param text The text that must be split in to words.
    @param min_word_return_size The minimum no of characters a word must have to be included.
    """
    splitter = re.compile('[^a-zA-Z0-9_\\+\\-/]')
    words = []
    for single_word in splitter.split(text):
        current_word = single_word.strip().lower()
        # leave numbers in phrase, but don't count as words, since they tend to invalidate scores of their phrases
        if len(current_word) > min_word_return_size and current_word != '' and not is_number(current_word):
            words.append(current_word)
    return words


def split_sentences(text):
    """
    Utility function to return a list of sentences.
    @param text The text that must be split in to sentences.
    """
    sentence_delimiters = re.compile(u'[.!?,;:\t\\\\"\\(\\)\\\'\u2019\u2013]|\\s\\-\\s')
    sentences = sentence_delimiters.split(text)
    return sentences


def build_stop_word_regex(stop_word_file_path):
    stop_word_list = load_stop_words(stop_word_file_path)
    stop_word_regex_list = []
    for word in stop_word_list:
        word_regex = r'\b' + word + r'(?![\w-])'
        stop_word_regex_list.append(word_regex)
    stop_word_pattern = re.compile('|'.join(stop_word_regex_list), re.IGNORECASE)
    return stop_word_pattern


def generate_candidate_keywords(sentence_list, stopword_pattern):
    phrase_list = []
    for s in sentence_list:
        tmp = re.sub(stopword_pattern, '|', s.strip())
        phrases = tmp.split("|")
        for phrase in phrases:
            phrase = phrase.strip().lower()
            if phrase != "":
                phrase_list.append(phrase)
    return phrase_list


def calculate_word_scores(phraseList):
    word_frequency = {}
    word_degree = {}
    for phrase in phraseList:
        word_list = separate_words(phrase, 0)
        word_list_length = len(word_list)
        word_list_degree = word_list_length - 1
        for word in word_list:
            word_frequency.setdefault(word, 0)
            word_frequency[word] += 1
            word_degree.setdefault(word, 0)
            word_degree[word] += word_list_degree
    for item in word_frequency:
        word_degree[item] = word_degree[item] + word_frequency[item]

    # Calculate Word scores = deg(w)/frew(w)
    word_score = {}
    for item in word_frequency:
        word_score.setdefault(item, 0)
        word_score[item] = word_degree[item] / (word_frequency[item] * 1.0)
    return word_score


def generate_candidate_keyword_scores(phrase_list, word_score):
    keyword_candidates = {}
    for phrase in phrase_list:
        keyword_candidates.setdefault(phrase, 0)
        word_list = separate_words(phrase, 0)
        candidate_score = 0
        for word in word_list:
            candidate_score += word_score[word]
        keyword_candidates[phrase] = candidate_score
    return keyword_candidates


class Rake(object):
    def __init__(self, stop_words_path):
        self.stop_words_path = stop_words_path
        self.__stop_words_pattern = build_stop_word_regex(stop_words_path)

    def run(self, text):
        sentence_list = split_sentences(text)

        phrase_list = generate_candidate_keywords(sentence_list, self.__stop_words_pattern)

        word_scores = calculate_word_scores(phrase_list)

        keyword_candidates = generate_candidate_keyword_scores(phrase_list, word_scores)

        sorted_keywords = sorted(keyword_candidates.items(), key=operator.itemgetter(1), reverse=True)
        return sorted_keywords

In [8]:
import os
v = Rake(os.getcwd()+'/analytics/Stop_list.txt')

In [9]:
v.run(all_data[-1]['details'])

[('10k follower', 4.0),
 ('fake doesn', 4.0),
 ('10k likes', 4.0),
 ('facebook page', 4.0),
 ('linkedin page', 4.0),
 ('lowest', 1.0),
 ('quantity', 1.0),
 ('ll', 1.0),
 ('genuine', 1.0),
 ('quality', 1.0),
 ('pricing', 1.0),
 ('bid', 1.0),
 ('matter', 1.0)]

In [10]:
predicted_data = all_data[-7:-1]

import time

for i in all_data:
    r = i['details']
    predicted = v.run(r)
    print(predicted[0:5])
    time.sleep(9)

[('fact\r\r-------------------------------------------------------------------------------\radditional information added', 23.0), ('-------------------------------------------------------------------------------\radditional information added', 18.0), ('perform simple google image search', 17.2), ('shopping malls\r\n\r\nfact2', 9.0), ('facts related images', 7.1)]


[('specific phone number saved', 14.0), ('phone number mailtofawadatgmaildotcom', 10.0), ('add google add', 9.0), ('specific devices', 5.0), ('contact list', 4.0)]


[('suggestion needed', 4.0), ('youtube video', 3.5), ('upload videos', 3.5), ('youtube', 1.5), ('videos', 1.5)]


[('needa android app', 8.5), ('online payment', 4.0), ('cod option', 4.0), ('grofers app', 4.0), ('grofers', 1.5)]


[('2017\r\rpart time jobs-simple online jobs', 19.277777777777775), ('-------------------------------------------------------------------------------\radditional information added', 16.0), ('offer genuine extra income', 14.666666666666666), ('easy data entry jobs', 14.31111111111111), ('online part time jobs', 14.277777777777777)]


[('-------------------------------------------------------------------------------\radditional information added', 16.0), ('simply data entry job', 13.627272727272727), ('easy data entry jobs', 13.3), ('part time home job', 13.227272727272727), ('online data entry', 9.65)]


[('growing real legitimate ad posting opportunities', 36.0), ('part time form filling jobs opportunities', 30.476190476190478), ('previous experience needed start earning', 25.0), ('month payment guarantee hundreds', 16.0), ('-------------------------------------------------------------------------------\radditional information added', 16.0)]


[('allotted bulk government & private sector projects', 36.0), ('-------------------------------------------------------------------------------\radditional information added', 16.0), ('simply data entry job', 13.63913043478261), ('part time home job', 13.31055900621118), ('easy data entry jobs', 12.4)]


[('giving pravachaan', 4.0), ('1 min approx', 4.0), ('animated video', 3.333333333333333), ('video', 1.3333333333333333), ('baba', 1.0)]


[('write couple', 4.0), ('past projects', 4.0), ('financial market', 4.0), ('service offering', 4.0), ('company introduction', 4.0)]


[('advance--\r\nvideo link -- https', 23.5), ('youtube policy\r\n\r\nbudget-- 1k inr/-', 23.5), ('youtube video', 7.0), ('/7312qc4wpoo\r\n\r\ninterested', 4.0), ('10k views', 4.0)]


[('article/ content writer required\r\n\r\nniche', 25.0), ('30 articles\r\nlong term work', 16.0), ('rs 89-90/ article\r\nmonthly', 16.0), ('400-500 words/ article\r\npayout', 16.0), ('email marketing\r\nlanguage', 8.5)]


[('fastest growing verticals globally', 16.0), ('won dedicated resource working', 16.0), ('boss employee culture', 9.0), ('fantasy sports startup', 9.0), ('mumbai startup venturing', 9.0)]


[('yellow pages', 4.0), ('web based', 4.0), ('6-7 screens', 4.0), ('skilled developer', 4.0), ('community contacts', 4.0)]


[('search engine marketing', 9.0), ('search engine optimization', 9.0), ('digitally grow moneymatch', 7.5), ('moneymatch reach', 4.5), ('initial quote', 4.0)]


[('business deck', 4.0), ('folks', 1.0), ('investors', 1.0), ('prepare', 1.0), ('pitch', 1.0)]


[('10k follower', 4.0), ('fake doesn', 4.0), ('10k likes', 4.0), ('facebook page', 4.0), ('linkedin page', 4.0)]


NameError: name 'all_data' is not defined

this is cleaned

...
Hi, I need FreeLancer/ FullTime  Web Designer / UI designer for website and developmenet work for my software. Preferably from Hyderabad. The software is built on PHP/Codeignitor. I have to add some new  modules and change the existing design. I want the web developer to sync his ideas  on UI/Design with my PhP developer .My website is www.healthfox.com. 

this is 
