In [None]:
import sys
from IPython.core.display import HTML
import logging
m_logger = None

def initLogger(msg="Started!", level=logging.DEBUG):
    "Limeloader Logging system"
    global m_logger
    m_logger = logging.getLogger(__name__)
    handler = logging.StreamHandler(sys.stderr)
    m_logger.handlers = []
    m_logger.addHandler(handler)
    m_logger.setLevel(level)
    m_logger.debug(msg)
    
initLogger("Limeloader 0.1")

page = open("data/usage.html", "r")
contents = page.read()
HTML(contents)


In [None]:
## LOAD CSV in Dictionary list
## Set options in m_options at bottom of cell

import requests

import urllib.request
import urllib.parse
import csv
import json
import codecs
import time, datetime
import getpass, sys

csv.field_size_limit(sys.maxsize)


m_version = "1.0.0"

m_dataFile = ""
m_data = []
m_defaultHost = 'localhost'


m_columnNames = []




def loadData(file, display=False):
    "Load CSV data into m_data"
    global m_data, m_dataFile
    m_dataFile = file
    m_data = loadListFromCSV(m_dataFile, display)
    

def loadListFromCSV(path, display=False):
    "Load unicode CSV file"
    header = [];
    dList = []
    
    with open(path) as f:
        headerNeeded = True
        reader = csv.reader(f)
        for row in reader:
            if headerNeeded:
                idx = 0
                for col in row:
                    header.append(col)
                    idx += 1
                headerNeeded = False
                m_logger.debug("loadListFromCSV: Header with "+str(idx)+" columns read from file: "+path)
            else:
                data = []
                idx = 0
                for value in row:
                    item = {}
                    column = header[idx] 
                    item["name"] = column
                    item["value"] = value
                    dataType = "attribute"
                    if column[:1] == '_':
                        dataType = "dimension"
                    elif column.isupper():
                        dataType = "measure"
                    item["dataType"] = dataType
                    data.append(item);

                    idx += 1
                    
                dList.append(data)
    
    m_logger.debug("loadListFromCSV: "+str(len(dList))+" data records read from file: "+path)
    
    if display:
#        print ("Columns:")
#        print (header)
        idx = 1
        for row in dList:
            print ("Row "+str(idx)+":")
            print (row)
            idx = idx +1
            
    return dList
    

def dictItemToCSV(item, columnNames, zeroFill=False, extra={}):
    "Dict item to CSV conversion"
    line = []
    source = item["_source"] if ("_source" in item) else {}
    for name in columnNames:
        if name in extra:
            line.append(extra[name])
        elif name in source:
            line.append(source[name])
        elif name in item:
            line.append(item[name])
        elif zeroFill:
            line.append("0")
        else:
            line.append("")

    return line

def writeListToCSV(basename, dlist, columnNames, display=False):
    "Write Unicode CSV file"
    fileName = "./output/actions-"+basename+".csv"
    m_logger.debug("writeResultsToCSV: Saving "+str(len(dlist))+" records to file: "+fileName)

    with open(fileName, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(columnNames)
        for item in dlist:
            csvItem = dictItemToCSV(item,columnNames)
            if display:
                print (item)
            writer.writerow(csvItem)

def writeHeaderToCSV(basename, columnNames, display=False):
    "Write Unicode CSV file"
    fileName = "./output/actions-"+basename+".csv"
    m_logger.debug("writeResultsToCSV: Writing to file: "+fileName)

    if display:
        print (columnNames)

    with open(fileName, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(columnNames)


def appendListToCSV(basename, dList, columnNames, zeroFill=False, display=False):
    "Append data to Unicode CSV file"
    fileName = "./output/actions-"+basename+".csv"
    m_logger.debug("appendListToCSV: Appending "+str(len(dList))+" records to file: "+fileName)

    with open(fileName, 'a') as csvfile:
        writer = csv.writer(csvfile)
        for item in dList:
            csvItem = dictItemToCSV(item,columnNames,zeroFill)
            if display:
                print (item)
            writer.writerow(csvItem)
        

            


def initialize(msg):
    "Initialize"
    initLogger(msg)


# **** START HERE ******
m_options = {
    "file": "data/BigSet.csv", # Incoming data
    "es": "http://localhost:9200", # Target ES host:port
    "indexName": "qlik/document", # Target ES index
    "trace": True, # Target ES host, Required
    "query": "ikea"
}

m_logger.debug("** Options **")
m_logger.debug(m_options)

loadData(m_options["file"],True)


In [None]:
## DELETE Qlik INDEX - R U SURE????
## Remember to start Elastic Search server at address specified in m_options (usually localhost:9200)
import requests
url = 'http://localhost:9200/qlik/'
response = requests.delete(url)
jsonData = response.json()
print (jsonData)

In [None]:
## Write Dictionary list to Elastic Search
## Remember to start Elastic Search server at address specified in m_options (usually localhost:9200)

import requests

def writeToIndex(indexItem):
    data = json.dumps(indexItem)
    print ("writeToIndex:")
    print (data)

    url = 'http://localhost:9200/qlik/document/'
    response = requests.post(url, data=data)
    jsonData = response.json()
    print (jsonData)
    
def addRowToIndex(attributes, measures, dimensions, display=False):
    "Add row to Elastic Search index"
    
    for measure in measures:
        indexItem = {}
        for attribute in attributes:
            indexItem[attribute["name"]] = attribute["value"]
        for dimension in dimensions:
            indexItem[dimension["name"]] = dimension["value"]
        indexItem["name"] = measure["name"]
        indexItem["value"] = measure["value"]
        writeToIndex(indexItem)

def writeListToIndex(dList, url, display=False):
    "Write dictonary list to Elastic Search"

    
    idx = 1
    for row in dList:
        attributes = []
        measures = []
        dimensions = []
        for item in row:
            if item["dataType"] == "attribute":
                attributes.append(item)
            elif item["dataType"] == "measure":
                measures.append(item)
            elif item["dataType"] == "dimension":
                dimensions.append(item)
                
        if display:
            print ("Row "+str(idx)+":")
#            print (attributes)
#            print (measures)
#            print (dimensions)
        
        addRowToIndex(attributes,measures,dimensions,display)
        idx += 1
    return dList
    

writeListToIndex(m_data, m_options["file"], m_options["trace"]);


In [None]:
## Get ES Stats and content

import requests

response = requests.get('http://localhost:9200/qlik/?format=json&pretty')
jsonData = response.json()
print (jsonData)

In [None]:
# Simple Field Search

import requests
url = 'http://localhost:9200/qlik/document/'
data = """{
    "name": "ikea",
    "Country": "Sweden",
    "Private": "True"
}"""
response = requests.post(url, data=data)
jsonData = response.json()
print (jsonData)

In [None]:
# Another Field Search

import requests
url = 'http://localhost:9200/qlik/document/'
data = """{
    "test1": "1111",
    "test2": "2222",
    "test3": "3333"
}"""
response = requests.post(url, data=data)
jsonData = response.json()
print (jsonData)

In [None]:
# Single Term Search

import requests
url = 'http://localhost:9200/qlik/_search?format=json&pretty=true'
data = """{
    "query": {
        "term": {"Country":"sweden"}
    }
}"""
response = requests.post(url, data=data)
jsonData = response.json()
print (jsonData)

In [None]:
import requests
url = 'http://localhost:9200/qlik/_search?format=json&pretty=true'
data = """{
  "query": {
    "multi_match" : {
      "query":    "ikea", 
      "fields": [ "n*" ] 
    }
  }
}"""
response = requests.post(url, data=data)
jsonData = response.json()
print (jsonData)

In [None]:
# Get Index schema

import requests
url = 'http://localhost:9200/qlik/document/_mapping'
response = requests.get(url)
jsonData = response.json()
print (jsonData)