# Redis Assignment
Rafaila Galanopoulou 8160018 \
Big Data Management Systems Course 2020 \
Professor: Damianos Chatziantoniou

In [1]:
import redis
import random 
import pandas as pd
import numpy as np

### Data Creation for the scope of project

In [2]:
# Generates a random number between 
# a given positive range 
df = pd.DataFrame(np.random.randint(0,20,size=(50, 2)), columns=['ID', 'Transaction'])
df.Transaction = df.Transaction.apply(lambda x: f"T{x}")

In [3]:
df.to_csv('sample.csv')
df.to_excel('sample.xlsx')

### Connect with Redis

In [4]:
# define our connection information for Redis
# Replaces with your configuration information found in redis.config file
redis_host = "localhost"
redis_port = 6379
redis_password = ""
p = redis.ConnectionPool()

#### Create_KLStore (name, data-source, query-string, position1, position2, direction)
This function creates in Redis a KL store with name "name" using the data source found in "data-source". Data sources can be found in an JSON files described later and for the scope of this project can be either a csv file, a relational database or an excel file.

In the case of a **csv file**, "query-string" is empty and "position1" and "position2" two integer numbers specifying the column positions that will be used to form the (u,v) pairs of S (as described earlier). Data sources can be found in this [JSON file](csvdatasource.json).

In [5]:
def create_KLStore_csv (name, data_source, query_string, position1, position2, direction):

    # create the Redis Connection object
    try:
        r = redis.Redis(connection_pool=p, host = redis_host, port = redis_port, password = redis_password, db = 0, decode_responses = True)

        csvinfo = pd.read_json(data_source)
        
        #get the csv filename
        file = csvinfo.datasource.filename

        # read the csv file
        df = pd.read_csv(file)

        #push data in the right Redis list
        if direction == 1:
            for index,i in df.iterrows():
                nm = name+":"+ str(i[position1 - 1])
                r.lpush(nm ,str(i[position2 - 1]))
        elif direction == 2:
            for index,i in df.iterrows():
                nm = name+":"+ str(i[position2 - 1])
                r.lpush(nm,str(i[position1 - 1]))

    except Exception as e:
        print(e)


In the case of an **excel**, "query-string" contains the index of the worksheet and "position1" and "position2" two integer numbers specifying the column positions that will be used to form the (u,v) pairs ofS (as described earlier). Data sources can be found in this [JSON file](exceldatasource.json).

In [6]:
def create_KLStore_excel (name, data_source, query_string, position1, position2, direction):

    # create the Redis Connection object
    try:
        r = redis.Redis(connection_pool=p, host = redis_host, port = redis_port, password = redis_password, db = 0, decode_responses = True)

        excelinfo = pd.read_json(data_source)

        #get the excel filename
        file = excelinfo.datasource.filename

        #read the excel file
        df = pd.read_excel(file, query_string)
        
        if direction == 1: #push data in the KL1
            for index,i in df.iterrows():
                u = name + str(i[position1 - 1])
                r.lpush(u ,str(i[position2 - 1]))
        elif direction == 2:
            for index,i in df.iterrows(): #push data in the KL2
                u = name + str(i[position2 - 1])
                r.lpush(u,str(i[position1 - 1]))

    except Exception as e:
        print(e)

In the case of a **relational database**, "query-string" is an SQL statementin the form SELECT col1, col2 WHERE "etc". "direction" has the value 1 or 2, specifying whether KL1(D) or KL2(D) should be implemented.

#### FIlter_KLStore(name1, expression)
This function gets a KL store in Redis named "name1" and a string called "expression" representing a valid python boolean expression and applies this expression on each element of each list of "name1". If the return value is true, the element remains in the list, otherwise it is removed. Come up with a convention on how the element of the list is mentioned within the "expression". \
`scan_iter()` is superior to `keys()` for large numbers of keys because it gives you an iterator you can use rather than trying to load all the keys into memory. So I am using `scan_iter()` to get all keys from the store matching a pattern and check if expression true one-by-one:

In [7]:
def filter_klstore (name1, expression):
    try:
        r = redis.Redis(connection_pool=p, host = redis_host, port = redis_port, password = redis_password, db = 0, decode_responses = True)
        # parse all the keys in name1 KLstore
        for key in r.scan_iter(name1 + ':*'):
            # i is for index in list
            i = -1
            # get every value of key
            for value in r.lrange(key,0,-1):
                i = i + 1
                if not eval(expression):
                    r.lrem(key,i,value) #remove (u,v) if expression is FALSE

    except Exception as e:
        print(e)

#### Apply_KLStore (name1, func)
This function gets a KL store in Redis named "name1" and a python function named "func" -which gets a string and returns a string –and applies "func" on each elementof a list, for all lists of the KL store "name1",transforming thus the lists of the KL store.

In [8]:
def apply_klstore (name1, func):

    # create the Redis Connection object
    try:
        r = redis.Redis(connection_pool=p, host = redis_host, port = redis_port, password = redis_password, db = 0, decode_responses = True)
        # parse all the keys in name1 KLstore
        for key in r.scan_iter(name1 + ':*'):
            i = -1 #list index
            for value in r.lrange(key,0,-1): # get every value of key
                # update every value of the key by calling function func that returns a string
                i = i + 1
                r.lset(key,i,func(value))

    except Exception as e:
        print(e)

#### Aggr_KLStore (name1, aggr)
This function gets a KL store in Redis named "name1" and a string named "aggr" that can have one of the values “avg/sum/count/min/max”and aggregates each list of the KL store "name1" according to the specified aggregate, updating the list with just one item, the result of the aggregation. 

In [9]:
def aggr_klstore (name1, aggr):

 # create the Redis Connection object
    try:
        r = redis.Redis(connection_pool=p, host = redis_host, port = redis_port, password = redis_password, db = 0, decode_responses = True)
        # parse all the keys in name1 KLStore
        for key in r.scan_iter(name1 + ':*'):
            list = r.lrange(key,0,-1)
            #transform string values into integers
            numbers = [int(v) for v in list]
            i = -1
            for value in r.lrange(key,0,-1):
                i = i + 1
                r.lrem(key,i,value) # delete (u,v)
            if aggr:
                # add the value of aggregation in klstore
                if str(aggr) == 'max':
                    r.lpush(key,max(numbers))
                elif str(aggr) == 'min':
                    r.lpush(key,min(numbers))
                elif str(aggr) == 'count':
                    r.lpush(key,len(numbers))
                elif str(aggr) == 'sum':
                    r.lpush(key,sum(numbers))
                elif str(aggr) == 'avg':
                    r.lpush(key, sum(numbers)/len(numbers))
    except Exception as e:
        print(e)


#### LookUp_KLStore(name1, name2)
This function gets two KL stores named "name1" and "name2" and for each element e of a list L in "name1", performs a lookup for e in the keys of "name2", gets the list L’ of the matched key, 
and replaces e in L with the elements of L’. This should happen for all lists in "name1". 

In [10]:
def lookUp_klstore (name1, name2):

 # create the Redis Connection object
    try:
        r = redis.Redis(connection_pool=p, host = redis_host, port = redis_port, password = redis_password, db = 0, decode_responses = True)
        keys1 = r.smembers(name1)
        keys2 = r.smembers(name2)
        # parse all the keys in name1 KLstore
        for key in keys1:
            list1 = r.lrange(key1, 0, -1)
            for value1 in list1:
                if value1.strip() in keys2:
                    list2 = r.lrange(value1.strip(), 0, -1) #add value1 in list of name2
                    r.lrem(key1, 0, value1)

                    for value2 in list2:
                        r.rpush(key1, value2)

    except Exception as e:
        print(e)
        

### Testing Time

In [11]:
def exfunc(a):
    return a + "example function applied"

In [12]:
filter_klstore('k1','len(value) < 1')
apply_klstore('k1', exfunc)
aggr_klstore ('k1', 'max')
lookUp_klstore('k1','k2')

## References

[Installation guide](https://realpython.com/python-redis/) \
[redis-py's Documentation](https://redis-py.readthedocs.io/en/stable/) /
[All Redis commands](https://cheatography.com/tasjaevan/cheat-sheets/redis/) \
[Redis Lists Tutorial](https://www.tutorialspoint.com/redis/redis_lists.htm) \
[Reading and Writing Data with redis-py](https://redislabs.com/lp/python-redis/) \
[Real-World Redis Tips](https://blog.heroku.com/real-world-redis-tips)