In [None]:
#Imports
import logging
import math
import re
from collections import Counter
from py2neo import Graph
import json
from bottle import Bottle
from cherrypy.wsgiserver import CherryPyWSGIServer

WORD = re.compile(r'\w+')

logging.basicConfig(level=logging.INFO, format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S')

logger = logging.getLogger(__name__)


In [None]:

# Cosine values function


def get_cosine(vec1, vec2):
    intersection = set(vec1.keys()) & set(vec2.keys())
    numerator = sum([vec1[x] * vec2[x] for x in intersection])
    sum1 = sum([vec1[x] ** 2 for x in vec1.keys()])
    sum2 = sum([vec2[x] ** 2 for x in vec2.keys()])
    denominator = math.sqrt(sum1) * math.sqrt(sum2)
    if not denominator:
        return 0.0
    else:
        return float(numerator) / denominator


In [None]:
# Convert text to vector
def text_to_vector(text):
    words = WORD.findall(text)
    return Counter(words)

In [None]:
class Query(object):
    def __init__(self, ip_addr, username, password):
        self.ip_addr = ip_addr
        self.username = username
        self.password = password
        self.graph = Graph(self.ip_addr, username=self.username, password=self.password)

    # Query for matching disease code to its synonyms
    def Code_to_Synonym(self, disease_code):
        logger.info("Matching the disease code from its synonym : ", disease_code)
        query = ''' MATCH (co:Code{CodeID:"%s"}) - [:is_known] -> (dis:Synonym)
                    RETURN DISTINCT dis.Is_known
                ''' % (disease_code)

        result = self.graph.run(query)
        logger.info("Disease code matched to its synonym")
        print result
        return result

    # Query for matching disease name to its code
    def Synonym_to_Code(self, disease_name):
        logger.info("Matching synonym from the disease code: ", disease_name)
        query = ''' MATCH (syn:Synonym{Is_known:"%s"}) <-[:is_known] - (disease:Code)
                    RETURN DISTINCT disease.CodeID
                ''' % (disease_name)

        result = self.graph.run(query)
        logger.info("Synonym matched to its code")
        return result


    # Query to find all the parents of a disease code
    def code_to_parent(self, disease_code):
        logger.info("Returning all the parents of the disease code: ", disease_code)
        query = ''' MATCH(:Code{CodeID:"%s"}) - [:is_child_of*] -> (parent:Code)
                    RETURN DISTINCT parent.CodeID
                ''' % (disease_code)
        result = self.graph.run(query)
        logger.info("Parents of the disease code returned")
        return result

    # Query to find all the immediate children of a disease code
    def code_to_child(self, disease_code):
        logger.info(" Returning all the immediate children of a disease code : ", disease_code)
        query = ''' MATCH(:Code{CodeID:"%s"}) <- [:is_child_of] - (child:Code)
                    RETURN DISTINCT child.CodeID
                ''' % (disease_code)

        result = self.graph.run(query)
        logger.info(" Immediate children of the disease code returned ")
        return result

    # Exact word match query for disease synonyms
    def word_to_code(self, word):
        logger.info("Exact matching from the [:is_known] relation of the word : ", word)
        query = ''' MATCH (Disease:Synonym)
                    WHERE Disease.Is_known contains "%s"
                    MATCH (Disease:Synonym {Is_known : Disease.Is_known}) <- [:is_known] - (disease:Code) 
                    WHERE disease.CodeID IS NOT NULL 
                    RETURN DISTINCT Disease.Is_known, disease.CodeID
                ''' % (word)

        result = self.graph.run(query)
        logger.info(" Returned the disease code and disease synonyms of the given word")
        return result

    def code_to_frequent_codes(self, disease_code):
        query = ''' MATCH (code:Code{CodeID:"%s"})
                    MATCH p=(code:Code)-[r:occurs_with]->(code1:Code)
                    RETURN code1.CodeID
                    ORDER BY r.weight DESC
                    limit 3
                ''' % (disease_code)

        result = self.graph.run(query)
        return result

# Day 3

## Path for big.txt
[big.txt](../data/big.txt)

In [None]:
import requests

url = "http://localhost:8080/hello/Manas"

headers = {
    'cache-control': "no-cache"
    }

response = requests.request("GET", url, headers=headers)

print response.text

In [None]:
from bottle import route, run, template

@route('/hello/<name>')
def index(name):
    return template('<b>Hello {{name}}</b>!', name=name)

if __name__ == '__main__':
	run(host='localhost', port=8080)

In [None]:

"""
API to generate word counts and get word count
"""

import json
import os
from bottle import route, run, request

# TODO: Replace this with a collections Counter object
word_counts = {'word': 10, 'is': 123, 'good': 5}

# TODO:
"""
1. Write a function to preprocess+clean data from "big.txt" 
2. Generate a collections Counter object
3. Load the counter object in memory before starting the API
4. Get the API to provide Top N (configurable) words (MAKE SURE TO ACCOUNT FOR ERRORS !!)
5. Get the word count for a word
6. Update the count of a word in the dictionary
7. Delete a word from the counter object
8. Bundle the API in a Docker image
9. Deploy the Docker API on the cloud
10.Push the docker image to DockerHub

"""

# TODO: Add logging in each function and save to a logfile

@route('/words/')
def list_word_counts():
    return { "success" : True, "info" : word_counts }

@route('/words/top/<n:int>', method='GET')
def list_top_word(n):
    return { "success" : True, "info" : dict(word_counts.items()[:n]) }

@route('/words/counts/<word>', method='GET')
def get_word_count(word):
	# TODO: Return error statements
    return { "success" : True, "count" : word_counts[word] }

@route('/words/add', method='PUT')
def add_word():
	# TODO: Account for adding multiple words
	data = json.loads(request.body.read())
	word_counts.update(data)
	return { "success" : True, "info" : "Word %s added in the dictionary!"%data.keys()[0] }

@route('/words/remove/<word>', method='DELETE')
def remove_word(word):
    return { "success" : True, "info" : "Word %s removed from the dictionary!"%word }


if __name__ == '__main__':
	run(host='localhost', port=8080, debug=True)


