In [2]:
import pymongo
client = pymongo.MongoClient('mongodb://localhost:27017/')
jobs_db = client['jobs']
table = jobs_db['extracted jobs wnh']

In [3]:
all_data = list(table.find())
print(all_data[-1])

{'location': '/WorkProjects/ProjectDetail/PHP-Mysql-Coders-for-Shopping-Marketplace-site-development/78622', 'title': 'PHP/Mysql Coders for Shopping - Marketplace site development', '_id': ObjectId('59a16700dc9cb52080a76632'), 'details': 'PHP/Mysql Coders for Shopping - Marketplace site development\r\n* Coder should be available online from 9AM to 8PM IST\r\n* Coder should have experience in Codeigniter framework\r\n* Coder should have done API integrations \r\n* Shopping cart development experience - preferred\r\n\r\nThe bid should be for Flat Monthly pay if work is good this may be an ongoing job. We need experienced people only and one who can dedicate this time.  We will drop any one who is doing multiple projects in our work time.\r\n\r\nA message without the Bid amount is ignored.  We will take your interview by phone or Skype so please provide both the details. Should be ready to start immediately when hired.', 'skills': '\nSkills:\nPHP, MySQL, CodeIgniter', 'date posted': '2017

In [4]:
import re
import operator


def is_number(s):
    try:
        float(s) if '.' in s else int(s)
        return True
    except ValueError:
        return False


def load_stop_words(stop_word_file):
    """
    Utility function to load stop words from a file and return as a list of words
    @param stop_word_file Path and file name of a file containing stop words.
    @return list A list of stop words.
    """
    stop_words = []
    for line in open(stop_word_file):
        if line.strip()[0:1] != "#":
            for word in line.split():  # in case more than one per line
                stop_words.append(word)
    return stop_words


def separate_words(text, min_word_return_size):
    """
    Utility function to return a list of all words that are have a length greater than a specified number of characters.
    @param text The text that must be split in to words.
    @param min_word_return_size The minimum no of characters a word must have to be included.
    """
    splitter = re.compile('[^a-zA-Z0-9_\\+\\-/]')
    words = []
    for single_word in splitter.split(text):
        current_word = single_word.strip().lower()
        # leave numbers in phrase, but don't count as words, since they tend to invalidate scores of their phrases
        if len(current_word) > min_word_return_size and current_word != '' and not is_number(current_word):
            words.append(current_word)
    return words


def split_sentences(text):
    """
    Utility function to return a list of sentences.
    @param text The text that must be split in to sentences.
    """
    sentence_delimiters = re.compile(u'[.!?,;:\t\\\\"\\(\\)\\\'\u2019\u2013]|\\s\\-\\s')
    sentences = sentence_delimiters.split(text)
    return sentences


def build_stop_word_regex(stop_word_file_path):
    stop_word_list = load_stop_words(stop_word_file_path)
    stop_word_regex_list = []
    for word in stop_word_list:
        word_regex = r'\b' + word + r'(?![\w-])'
        stop_word_regex_list.append(word_regex)
    stop_word_pattern = re.compile('|'.join(stop_word_regex_list), re.IGNORECASE)
    return stop_word_pattern


def generate_candidate_keywords(sentence_list, stopword_pattern):
    phrase_list = []
    for s in sentence_list:
        tmp = re.sub(stopword_pattern, '|', s.strip())
        phrases = tmp.split("|")
        for phrase in phrases:
            phrase = phrase.strip().lower()
            if phrase != "":
                phrase_list.append(phrase)
    return phrase_list


def calculate_word_scores(phraseList):
    word_frequency = {}
    word_degree = {}
    for phrase in phraseList:
        word_list = separate_words(phrase, 0)
        word_list_length = len(word_list)
        word_list_degree = word_list_length - 1
        for word in word_list:
            word_frequency.setdefault(word, 0)
            word_frequency[word] += 1
            word_degree.setdefault(word, 0)
            word_degree[word] += word_list_degree
    for item in word_frequency:
        word_degree[item] = word_degree[item] + word_frequency[item]

    # Calculate Word scores = deg(w)/frew(w)
    word_score = {}
    for item in word_frequency:
        word_score.setdefault(item, 0)
        word_score[item] = word_degree[item] / (word_frequency[item] * 1.0)
    return word_score


def generate_candidate_keyword_scores(phrase_list, word_score):
    keyword_candidates = {}
    for phrase in phrase_list:
        keyword_candidates.setdefault(phrase, 0)
        word_list = separate_words(phrase, 0)
        candidate_score = 0
        for word in word_list:
            candidate_score += word_score[word]
        keyword_candidates[phrase] = candidate_score
    return keyword_candidates


class Rake(object):
    def __init__(self, stop_words_path):
        self.stop_words_path = stop_words_path
        self.__stop_words_pattern = build_stop_word_regex(stop_words_path)

    def run(self, text):
        sentence_list = split_sentences(text)

        phrase_list = generate_candidate_keywords(sentence_list, self.__stop_words_pattern)

        word_scores = calculate_word_scores(phrase_list)

        keyword_candidates = generate_candidate_keyword_scores(phrase_list, word_scores)

        sorted_keywords = sorted(keyword_candidates.items(), key=operator.itemgetter(1), reverse=True)
        return sorted_keywords

In [5]:
import os
v = Rake(os.getcwd()+'\\analytics\Stop_list.txt')

In [6]:
v.run(all_data[-1]['details'])

[('api integrations \r\n* shopping cart development experience', 30.0),
 ('marketplace site development\r\n* coder', 16.333333333333332),
 ('8pm ist\r\n* coder', 9.333333333333334),
 ('codeigniter framework\r\n* coder', 9.333333333333334),
 ('flat monthly pay', 9.0),
 ('start immediately', 4.0),
 ('ongoing job', 4.0),
 ('multiple projects', 4.0),
 ('php/mysql coders', 4.0),
 ('experienced people', 4.0),
 ('shopping', 3.5),
 ('bid amount', 3.5),
 ('experience', 3.5),
 ('work time', 3.0),
 ('work', 1.5),
 ('bid', 1.5),
 ('time', 1.5),
 ('online', 1.0),
 ('ready', 1.0),
 ('details', 1.0),
 ('skype', 1.0),
 ('message', 1.0),
 ('good', 1.0),
 ('dedicate', 1.0),
 ('drop', 1.0),
 ('interview', 1.0),
 ('phone', 1.0),
 ('preferred', 1.0),
 ('hired', 1.0),
 ('9am', 1.0),
 ('provide', 1.0)]

In [8]:
predicted_data = all_data[-7:-1]

import time

for i in all_data:
    r = i['details']
    predicted = v.run(r)
    print(predicted[0:5])
    time.sleep(9)

[('writer status', 4.0), ('short notice', 4.0), ('part timer', 4.0), ('15days basis', 4.0), ('part-time writer', 4.0)]


[('school erp product', 9.0), ('commission basis', 4.0), ('recurring commission', 4.0), ('guys', 1.0), ('marketing', 1.0)]


[('organising references', 4.0), ('presence required', 4.0), ('excel file', 4.0), ('work researching', 4.0), ('related field', 4.0)]


[('/affiliate/subscribe/ajax-complete-13-e-books-set-2e67b/\r\n\r\njava spring struct jsp 45 e-books set 299- inr\r\nhttps', 89.66666666666667), ('affiliate marketers apply\r\n\r\najax complete 13 e-books set 299- inr\r\nhttps', 86.16666666666667), ('/affiliate/subscribe/oracle-and-sql-32-e-books-set-d57dc/\r\n\r\nado dot net 10 e-books set 299- inr\r\nhttps', 75.66666666666666), ('/affiliate/subscribe/web-services-5-e-books-set-1a3b1/\r\n\r\nasp dot net 96 e-books set 499- inr\r\nhttps', 75.0), ('/affiliate/subscribe/c-10-e-books-set-list/\r\n\r\nweb services 5 e-books set 199- inr\r\nhttps', 62.0)]


[('/affiliate/subscribe/ajax-complete-13-e-books-set-2e67b/\r\n\r\njava spring struct jsp 45 e-books set 299- inr\r\nhttps', 89.66666666666667), ('affiliate marketers apply\r\n\r\najax complete 13 e-books set 299- inr\r\nhttps', 86.16666666666667), ('/affiliate/subscribe/oracle-and-sql-32-e-books-set-d57dc/\r\n\r\nado dot net 10 e-books set 299- inr\r\nhttps', 75.66666666666666), ('/affiliate/subscribe/web-services-5-e-books-set-1a3b1/\r\n\r\nasp dot net 96 e-books set 499- inr\r\nhttps', 75.0), ('/affiliate/subscribe/c-10-e-books-set-list/\r\n\r\nweb services 5 e-books set 199- inr\r\nhttps', 62.0)]


[('separate modules', 4.0), ('java', 1.0), ('development', 1.0)]


[('total number', 4.0), ('dozen sources', 4.0), ('feel free', 4.0), ('writer hired', 4.0), ('requires articles', 3.5)]


[('cg lighting+live', 4.0), ('model', 1.0), ('references', 1.0), ('attached', 1.0)]


[('citizen engagement', 4.0), ('write content', 4.0), ('dynamic flor', 4.0), ('write blogs', 4.0), ('contractual base', 4.0)]


[('full time female freelancers aka housewives preferred /', 64.0), ('occasional phone call', 9.0), ('document build', 4.0), ('executive assistant', 4.0), ('word *red*', 4.0)]


[('preferrably approx 800-1000 words', 15.5), ('travel write ups experience', 15.333333333333334), ('approx 50 odd articles', 9.5), ('decided \r\nlocation -', 9.0), ('travel based website', 8.833333333333334)]


[('launching designer indian wear clothing line', 36.0), ('dedicated fashion designers', 9.0), ('fashion merchandising start-', 9.0), ('relevant experience', 4.0), ('invite creative', 4.0)]


[('dst file format', 9.0), ('sketch drawing', 4.0), ('logo designer', 4.0), ('design software', 4.0), ('editing', 1.0)]


[('candy box tins', 9.0), ('preferably tif file', 9.0), ('product pictures', 4.0), ('dinner plates', 4.0), ('ceramic mugs', 4.0)]


[('led background', 4.0), ('animated background', 4.0), ('logo', 1.0), ('client', 1.0), ('20mm', 1.0)]


[('good internet access', 9.0), ('handle revision pressure', 8.5), ('offering quality content', 8.0), ('work pressure', 4.5), ('comfortable working', 4.0)]


[('3 months full time digital marketing trainee', 36.0), ('social media connections', 8.0), ('social media', 5.0), ('seo implementation\r\n2', 4.0), ('smo implementation\r\n3', 4.0)]


[('consulting company requiring freelancers', 16.0), ('student account\r\nhistory', 9.0), ('delhi ncr\r\n\r\nprefer', 9.0), ('months\r\npreferred area', 9.0), ('require hardware integration', 8.0)]


[('sourcing software projects', 9.0), ('part-time position', 4.0), ('online bidders', 4.0), ('project basis', 3.5), ('project', 1.5)]


[('testing required\r\n----------------------------\r\n1', 8.333333333333334), ('test script preparation', 8.0), ('mobile app testing\r\n4', 7.333333333333334), ('mobile app', 5.0), ('current job\r\n2', 4.0)]


[('quality check partial bg', 14.0), ('realistic background design artists', 12.866666666666667), ('long term basis', 9.0), ('multiple pre-production artist', 9.0), ('previous work link', 9.0)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('part time jobs-simple online jobs', 20.833333333333332), ('online data entry jobs', 16.333333333333332), ('online part time jobs', 15.833333333333332), ('offer genuine extra income', 14.666666666666666), ('part time jobs', 11.5)]


[('12-15 hours ensuring utmost quality', 19.6), ('-------------------------------------------------------------------------------\radditional information added', 16.0), ('learned writing error free', 14.5), ('experienced content writers', 9.0), ('high quality content', 8.6)]


[('noida guys', 3.5), ('noida', 1.5), ('gmail', 1.0), ('completing', 1.0), ('bid', 1.0)]


[('topics\r\n\r\ndetailed topic description\r\n1', 16.0), ('data warehousing concepts', 9.0), ('data warehousing concepts\r\n2', 9.0), ('create 100 assessment questions', 9.0), ('oracle bi tool', 8.6)]


[('expected \r\r-------------------------------------------------------------------------------\radditional information added', 25.0), ('refined content based multiple articles', 24.0), ('states andhra pradesh', 8.0), ('2017\r\rcontent writers', 6.0), ('madhya pradesh', 4.666666666666666)]


[('energetic female marketing professional', 16.0), ('upcoming women fashion brand', 16.0), ('conduct market research', 9.0), ('shopping locations', 4.0), ('distribute brochure', 4.0)]


[('1 pm india time', 9.0), ('attached newsletter design', 7.8), ('complete responsive html', 7.0), ('simple project', 4.0), ('mobile friendly', 4.0)]


[('part time work basis', 14.000000000000002), ('part time basis', 10.0), ('wordpress\r\ndatabase knowledge', 8.333333333333334), ('wordpress site', 4.333333333333334), ('minimum experience', 4.0)]


[('start upto 4', 4.0), ('shopify stores', 4.0), ('2-3 days', 4.0), ('layout', 1.0), ('configure', 1.0)]


[('good pay guranteed', 9.0), ('good writing skills', 8.0), ('goold keywords', 4.0), ('full understanding', 4.0), ('quality work', 4.0)]


[('highly analytical growth hacker', 15.333333333333334), ('generate quality traffic', 9.0), ('drive consumer insights', 9.0), ('full/part time\r\n\r\nsalary', 8.333333333333334), ('customer acquisition initiatives', 7.9)]


[('telemarketing person', 4.0), ('smart person', 4.0), ('taking followup', 4.0), ('thing related', 4.0), ('ecommerce industry', 4.0)]


[('product promotion video', 9.0), ('multimedia animator', 4.0), ('creating', 1.0)]


[('professional punjabi style high quality', 25.0), ('blue ray format', 8.5), ('mts format', 4.5), ('harnek singh', 4.0), ('whats app', 4.0)]


[('customised e-commerce order management solution', 25.0), ('fedex bluedart indian post', 13.5), ('fedex api', 5.0), ('shipping labels', 4.0), ('bluedart', 2.5)]


[('interested', 1.0), ('urgent', 1.0), ('apply', 1.0)]


[('reusable php modules\r\n* integrate common third-party apis', 41.666666666666664), ('integrating third-party payment gateways', 17.5), ('web technologies including html', 14.75), ('live server\r\n* build efficient', 14.666666666666666), ('front-end developers\r\n* develop', 9.0)]


[('work email finding job', 15.5), ('catch contact email', 9.5), ('pay 300rs', 4.0), ('rate', 1.0), ('site', 1.0)]


[('small electric car project', 16.0), ('kindly submit', 4.0), ('dpr made', 4.0), ('exhaustive one-', 4.0), ('2/3 days', 4.0)]


[('linear system based', 9.0), ('short triangular pulse', 8.5), ('modeling wave propagation', 8.5), ('spectral element method', 8.0), ('spectral method', 5.0)]


[('android & windows phone', 9.0), ('quality assurance & testing', 9.0), ('digital designing services', 8.0), ('top rank', 4.0), ('open source', 4.0)]


[('website \r\n\nemergency services app', 13.0), ('emergency services app', 10.5), ('website', 2.5)]


[('long term basis', 9.0), ('social media feeds', 9.0), ('add blog section', 9.0), ('approx charges', 4.0), ('retainership model', 4.0)]


[('prepare 50 assessment/mcq questions', 9.0), ('navigation developer', 3.5), ('navigation', 1.5), ('consultants/trainers', 1.0), ('hmi\r\n3', 1.0)]


[('prepare 50 assessment/mcq questions', 9.0), ('altova xmlspy', 4.0), ('solution designer', 4.0), ('freelance consultants', 4.0), ('uml\r\n2', 1.0)]


[('code deployment\r\nconfig management\r\nserver monitoring', 36.0), ('support\r\nhttps implementation\r\nmobile app deployment', 36.0), ('log analysis\r\nenvironment set', 13.5), ('maintenance\r\nvm set', 9.5), ('excellent aws experience', 9.0)]


[('hiring content strategists/ writers', 15.0), ('work full time', 9.0), ('good research skills', 8.5), ('effective writing style', 8.333333333333334), ('creative content', 5.0)]


[('translate technical terms', 9.0), ('startup company website', 8.5), ('company overview', 4.5), ('content writing', 4.0), ('homepage contents', 4.0)]


[('manoj shinh- don', 9.0), ('fund escrow', 4.0), ('paypal payment', 4.0), ('cheated', 1.0), ('work', 1.0)]


[('writing fitness product reviews', 10.0), ('dedicated freelance writer', 8.5), ('sample 500 word article', 8.25), ('good quality article', 7.75), ('fitness niche', 5.0)]


[('startup company', 4.0), ('business plan', 4.0), ('write', 1.0)]


[('apa 6th edition referencing style', 25.0), ('econ6001 economic principles\r\n\r\n\r\ncontext', 16.0), ('theory underlining graph 2 differ', 13.714285714285714), ('graph 1identify dependent variable', 12.714285714285715), ('source iscorrectly attributed', 8.0)]


KeyboardInterrupt: 

NameError: name 'all_data' is not defined

this is cleaned

...
Hi, I need FreeLancer/ FullTime  Web Designer / UI designer for website and developmenet work for my software. Preferably from Hyderabad. The software is built on PHP/Codeignitor. I have to add some new  modules and change the existing design. I want the web developer to sync his ideas  on UI/Design with my PhP developer .My website is www.healthfox.com. 

this is 
