# Different fundamental concepts for data structures and algorithms 

1. Unpacking a sequence into separate variables 

   * Unpack N-element tuple or sequence into a collection of N variables.

In [1]:
# example of unpacking N-element tuple

sample = ('alvin','research','money','passion','cars') # tuple
item1, *item2, item3 = sample
print(item1)
print(item2)
print(item3) 

item1, *item2 = sample
print(item1)
print(item2)

*item1, item2 = sample
print(item1)
print(item2)

# throwing away unwanted elements from defined tuple

item1,*_, item2 = sample
print(item1)
print(item2)

*_,item1, item2 = sample
print(item1)
print(item2)

alvin
['research', 'money', 'passion']
cars
alvin
['research', 'money', 'passion', 'cars']
['alvin', 'research', 'money', 'passion']
cars
alvin
cars
passion
cars


2. Keeping the last N items 
   
   * Want to keep a limited history of the last few items seen during iteration or some other kind of processing
   * Using dque function from collections module
     
     More information can be found from https://docs.python.org/3/library/collections.html

In [2]:
# Keeping the last N terms in a defined list using dque
from collections import deque
sample_list = [34, 78, 900, -288, 0.66]
value = 5
result = deque(sample_list, maxlen=value)
result.appendleft(1111)
result.pop()
result.popleft()
print(result)

deque([34, 78, 900], maxlen=5)


3. Finding the largest or smallest N items 
   
   * Want to make a list of the largest or smallest N items in a collection
   * Using dque module
     
     More information can be found from https://docs.python.org/3/library/heapq.html

In [3]:
# Finding the largest or smallest N items in defined list using heapq module
import heapq
import random

value = 100
main_list = []
for i in range(value):
    main_list.append(random.randint(0,1000))
print(main_list)

n = 5
result1 = heapq.nlargest(n,main_list)
print(result1)
result2 = heapq.nsmallest(n,main_list)
print(result2)

heap = list(main_list)
result3 = heapq.heapify(heap)
print(result3)

[254, 770, 834, 237, 327, 808, 307, 542, 870, 517, 471, 527, 783, 911, 335, 712, 159, 48, 500, 391, 854, 61, 515, 759, 922, 106, 723, 130, 475, 993, 498, 184, 560, 657, 169, 308, 222, 327, 478, 449, 490, 782, 288, 639, 169, 438, 977, 476, 391, 793, 944, 888, 465, 780, 691, 481, 51, 563, 230, 641, 948, 862, 679, 637, 809, 285, 235, 50, 742, 538, 54, 4, 199, 555, 455, 27, 747, 370, 750, 106, 775, 76, 387, 169, 222, 506, 831, 403, 49, 700, 522, 190, 513, 135, 469, 892, 607, 392, 13, 419]
[993, 977, 948, 944, 922]
[4, 13, 27, 48, 49]
None


4. Using defaultdict and OrderedDict functions

   * Import from collections module
   
     More information can be found from https://docs.python.org/3/library/collections.html

In [4]:
# mapping keys to multiple values in a defined dictionary using defaultdict
from collections import defaultdict
from collections import OrderedDict
dummy1 = defaultdict(list)
dummy1['alvin'].append(1000)
dummy1['alvin'].append(1000)
dummy1['duco'].append(2000)
print(dummy1)

dummy2 = defaultdict(set)
dummy2['alvin'].add(1000)
dummy2['alvin'].add(1500)
dummy2['duco'].add(2000)
print(dummy2)

dummy3 = OrderedDict()
dummy3['alvin'] = 1
dummy3['passion'] = 2
dummy3['duco'] = 3

for key in dummy3:
    print(key, dummy3[key])

defaultdict(<class 'list'>, {'alvin': [1000, 1000], 'duco': [2000]})
defaultdict(<class 'set'>, {'alvin': {1000, 1500}, 'duco': {2000}})
alvin 1
passion 2
duco 3


5. Calculations with dictionaries

   * Perform various calculations (minimum value, maximum value, sorting etc.) on a dictionary of data
   * Use of zip function for convenience
   
     More information can be found from https://docs.python.org/3.3/library/functions.html#zip

In [5]:
# calculating with defined dictionaries with zip function
from collections import OrderedDict
dummy = OrderedDict()
dummy['alvin'] = 1000
dummy['passion'] = 500
dummy['duco'] = 2000
dummy['research'] = 50
dummy['data science'] = 10000

min_value = min(zip(dummy.values(),dummy.keys()))
print(min_value)
max_value = max(zip(dummy.values(),dummy.keys()))
print(max_value)
sorted_value = sorted(zip(dummy.values(),dummy.keys()))
print(sorted_value)

## NOTE THAT ZIP function CAN ONLY BE USED ONCE. 
#If multiple entries have the same values, then the key will be used to determine the result (max or min)

(50, 'research')
(10000, 'data science')
[(50, 'research'), (500, 'passion'), (1000, 'alvin'), (2000, 'duco'), (10000, 'data science')]


6. Finding commonalities in two dictionaries

   * Finding the common values, keys etc. in two dictionaries

In [6]:
# Finding commonlities (same keys or values) in two defined dictionaries
from collections import OrderedDict
dummy = OrderedDict()
dummy['alvin'] = 1000
dummy['passion'] = 500
dummy['duco'] = 2000
dummy['research'] = 50
dummy['data science'] = 10000

dummy2 = OrderedDict()
dummy2['alvin'] = 1000
dummy2['chelsea'] = 500
dummy2['duco'] = 2000
dummy2['cars'] = 50
dummy2['data science'] = 10000

print(dummy.keys() & dummy2.keys()) # Finding keys in common
print(dummy.items() & dummy2.items()) # Finding items (keys and values) in common
print(dummy.keys() - dummy2.keys()) # Finding keys not in dummy2
print(dummy2.keys() - dummy.keys()) # Finding keys not in dummy

dummy3 = {key:dummy[key] for key in (dummy.keys() -dummy2.keys())}
print(dummy3)
dummy4 = {key:dummy2[key] for key in (dummy2.keys() -dummy.keys())}
print(dummy4)

{'duco', 'alvin', 'data science'}
{('duco', 2000), ('data science', 10000), ('alvin', 1000)}
{'research', 'passion'}
{'cars', 'chelsea'}
{'research': 50, 'passion': 500}
{'cars': 50, 'chelsea': 500}


7. Removing duplicates from a sequence while maintaining order

   * Eliminate the duplicate values in a sequence, but preserve the order of the remaining items

In [7]:
# To remove duplicates from a defined sequence while maintaining order

# for hashable items
import random
value = 100
main_list = list()
for i in range(value):
    main_list.append(random.randint(0,1000))

def duplicate (object_1):
    present = set()
    for item in object_1:
        if item not in present:
#             yield item # a very important step; used like the return keyword
            present.add(item)
    return present # alternative to yield keyword

result_1 = list(duplicate(main_list)) # using return function
print(result_1)


# for unhashable items such as dictionaries
import string
dummy_letters = string.ascii_letters # creating random letters of both uppercase and lowercase

main_list1 = []
for j in range (value):
    main_dict1 = {}
    for i in range(value):
        dummy_value = random.choice(dummy_letters)
        if dummy_value in main_dict1.keys():
            main_dict1[dummy_value] += 1
        else:
            main_dict1[dummy_value] = 1
    main_list1.append(main_dict1)
print(main_list1)

def duplicate2 (object_2, key = None):
    present = set()
    for item in object_2:
            value = item if key is None else key(item)
            if value not in present:
                present.add(value)
    return present # alternative to yield keyword

[2, 515, 9, 25, 539, 35, 548, 553, 43, 557, 565, 576, 586, 594, 596, 89, 605, 606, 620, 633, 640, 143, 657, 151, 671, 160, 678, 683, 176, 179, 196, 206, 213, 221, 740, 741, 228, 745, 746, 236, 749, 752, 251, 768, 262, 782, 270, 273, 276, 793, 291, 809, 314, 829, 831, 838, 327, 841, 351, 866, 354, 871, 873, 364, 877, 879, 368, 370, 375, 376, 887, 890, 383, 899, 902, 394, 907, 913, 929, 931, 942, 947, 952, 956, 447, 463, 976, 474, 991, 485, 487, 490, 493, 508]
[{'w': 3, 'b': 1, 'Y': 1, 'j': 7, 'P': 4, 'X': 2, 'n': 5, 'x': 4, 'm': 2, 'c': 1, 'k': 5, 'K': 1, 's': 2, 'W': 3, 'h': 2, 'F': 2, 'D': 2, 'd': 5, 'V': 2, 'p': 1, 'f': 2, 'Q': 3, 'C': 2, 'z': 2, 'a': 2, 'i': 1, 'A': 2, 't': 3, 'T': 1, 'u': 4, 'e': 1, 'U': 3, 'N': 1, 'l': 1, 'S': 1, 'J': 1, 'q': 1, 'H': 1, 'L': 2, 'M': 1, 'r': 2, 'I': 1, 'o': 2, 'B': 1, 'Z': 2, 'E': 1, 'g': 1}, {'l': 2, 'W': 2, 'R': 4, 'k': 5, 'V': 2, 'w': 4, 'Y': 1, 'y': 6, 'E': 2, 'c': 3, 'J': 3, 'G': 2, 'x': 3, 'X': 5, 'r': 4, 's': 3, 'B': 1, 'n': 2, 'A': 2, 'Z': 

8. Naming a slice

   * Part of data cleaning

In [8]:
# Naming a defined slice for data cleaning
import random
value = 100
main_list = list()
for i in range(value):
    main_list.append(random.randint(0,1000))
print(main_list)

a = random.randint(0,100)
b = random.randint(0,100)
result1 = slice(2,20) # creates a slice object that can be used anywhere a slice is allowed
print(main_list[result1])

result2 = slice(5,50,2)
print(result2.start)
print(result2.stop)
print(result2.step)

[715, 568, 756, 454, 209, 880, 296, 46, 622, 995, 99, 685, 797, 77, 460, 239, 199, 540, 451, 940, 936, 509, 893, 787, 416, 925, 670, 238, 361, 355, 314, 178, 187, 408, 770, 511, 235, 621, 359, 835, 256, 179, 386, 360, 681, 623, 336, 144, 944, 567, 257, 488, 690, 569, 521, 981, 147, 354, 467, 48, 481, 939, 782, 219, 242, 557, 535, 720, 58, 831, 27, 887, 678, 599, 269, 281, 309, 690, 997, 462, 897, 333, 973, 373, 698, 137, 626, 928, 603, 872, 616, 229, 419, 171, 745, 489, 26, 528, 60, 877]
[756, 454, 209, 880, 296, 46, 622, 995, 99, 685, 797, 77, 460, 239, 199, 540, 451, 940]
5
50
2


9. Determining the most frequently occurring items in a sequence

   * We have a sequence of items and would like to determine to most frequently occurring items in the particular sequence

In [9]:
# Determine most frequent occurring items in a defined sequence
import string
from collections import Counter
dummy_letters = string.ascii_letters 
value = random.randint(0,10e3)

main_dict = {}
for i in range(value):
    dummy_letter = random.choice(dummy_letters)
    if dummy_letter in main_dict.keys():
        main_dict[dummy_letter] += 1
    else:
        main_dict[dummy_letter] = 1

main_list = []
for i in range(value):
    value2 = random.randint(1,10)
    word = ''
    for j in range(value2):
        word += random.choice(dummy_letters)
    main_list.append(word)

word_counters = Counter(main_list)
number = random.randint(1,100)
top_number = word_counters.most_common(number)

main_list2 = []
for i in range(value):
    value2 = random.randint(1,10)
    word = ''
    for j in range(value2):
        word += random.choice(dummy_letters)
    main_list2.append(word)
    
word_counters2 = Counter(main_list2)
number2 = random.randint(1,100)
top_number2 = word_counters.most_common(number2)

for word in main_list2:
    word_counters[word] += 1

result1 = word_counters + word_counters2
result2 = max(word_counters - word_counters2)
print(result1)
#print(result2)

Counter({'y': 43, 'b': 36, 'z': 34, 't': 32, 'R': 31, 'Z': 30, 'e': 29, 'Q': 29, 'u': 29, 'l': 28, 'p': 28, 'T': 28, 'm': 28, 'k': 27, 'K': 27, 'P': 27, 'W': 27, 'F': 26, 'v': 26, 'Y': 26, 'H': 26, 'N': 26, 'j': 25, 'n': 25, 'C': 25, 'V': 24, 'S': 24, 'o': 24, 'x': 23, 'a': 23, 'i': 23, 'J': 22, 'c': 22, 'q': 21, 'U': 21, 'E': 21, 'X': 20, 'h': 20, 'd': 20, 's': 20, 'B': 19, 'A': 18, 'f': 18, 'r': 18, 'G': 18, 'O': 18, 'M': 17, 'D': 17, 'I': 16, 'g': 15, 'w': 14, 'L': 13, 'WT': 6, 'DL': 5, 'zL': 5, 'eL': 5, 'yr': 4, 'kJ': 4, 'sd': 4, 'Zj': 4, 'GF': 4, 'AP': 4, 'fO': 4, 'Sm': 4, 'Rl': 4, 'fl': 4, 'lL': 4, 'Dc': 4, 'nZ': 4, 'MI': 4, 'vJ': 4, 'TA': 4, 'ty': 4, 'IT': 4, 'wX': 4, 'SG': 4, 'OR': 4, 'xw': 4, 'oS': 4, 'kD': 4, 'UJ': 4, 'rV': 4, 'Ne': 3, 'sU': 3, 'Dj': 3, 'vx': 3, 'Gm': 3, 'gD': 3, 'yL': 3, 'Cp': 3, 'VH': 3, 'FX': 3, 'Xb': 3, 'kS': 3, 'nE': 3, 'pJ': 3, 'YM': 3, 'ku': 3, 'Em': 3, 'LZ': 3, 'On': 3, 'UM': 3, 'yX': 3, 'en': 3, 'xN': 3, 'wM': 3, 'ax': 3, 'pL': 3, 'KB': 3, 'HJ': 3, '

10. Sorting a list of dictionaries by a common key

   * We have a sequence of items and would like to determine to most frequently occurring items in the particular sequence
   * Use itemgetter function from the operator module
   
     More information can be found from https://docs.python.org/3/library/operator.html

In [10]:
# Sorting a defined list of dictionaries by a common key or keys
from operator import itemgetter
import random

value = random.randint(1,10000)
main_list = []
for i in range(value):
    sub_dict = {}
    sub_dict['index'] = random.randint(1,10000)
    main_list.append(sub_dict)
result1 = sorted(main_list, key = itemgetter('index'))

main_list2 = []
for i in range(value):
    sub_dict2 = {}
    sub_dict2['index'] = random.randint(1,10000)
    sub_dict2['number'] = random.randint(1,10000)
    main_list2.append(sub_dict2)
result2 = sorted(main_list2, key = itemgetter('index','number'))

# alternate way of doing so
main_list3 = []
for i in range(value):
    sub_dict3 = {}
    sub_dict3['index'] = random.randint(1,10000)
    sub_dict3['number'] = random.randint(1,10000)
    main_list3.append(sub_dict3)
result3 = sorted(main_list2, key = lambda x: (x['index']))
print(result3)

[{'index': 1, 'number': 7044}, {'index': 2, 'number': 4157}, {'index': 7, 'number': 362}, {'index': 7, 'number': 6239}, {'index': 8, 'number': 1014}, {'index': 13, 'number': 2786}, {'index': 13, 'number': 7431}, {'index': 17, 'number': 4671}, {'index': 21, 'number': 5565}, {'index': 21, 'number': 9302}, {'index': 21, 'number': 2676}, {'index': 23, 'number': 8290}, {'index': 24, 'number': 3552}, {'index': 24, 'number': 5686}, {'index': 25, 'number': 8055}, {'index': 27, 'number': 3324}, {'index': 28, 'number': 9246}, {'index': 28, 'number': 5627}, {'index': 30, 'number': 906}, {'index': 30, 'number': 375}, {'index': 33, 'number': 4313}, {'index': 37, 'number': 6856}, {'index': 38, 'number': 6133}, {'index': 39, 'number': 1918}, {'index': 41, 'number': 305}, {'index': 45, 'number': 7415}, {'index': 45, 'number': 212}, {'index': 47, 'number': 2440}, {'index': 50, 'number': 4472}, {'index': 51, 'number': 592}, {'index': 53, 'number': 508}, {'index': 58, 'number': 4180}, {'index': 58, 'numb

11. Filtering sequence elements

   * Extract values or reduce the sequence by using some criteria and list comprehensions

In [11]:
# filtering sequence elements using list comprehensions
import random 
value = random.randint(1,100)

main_list = [random.randint(1,value) for n in range(value)]
main_list2 = [n for n in (main_list) if n > 500]
print(main_list2)

[]


12. Extracting a subset of a dictionary

   * Making a dictionary that is a subset of another dictionary

In [12]:
# extracting a subset of defined dictionary
import random
import string
from collections import Counter
dummy_letters = string.ascii_letters 

value = random.randint(1,10000)
main_dict = {}
for i in range(value):
    main_dict[str(random.choice(dummy_letters))] = random.randint(1,10000)
new_dict = dict((key,value) for key, value in main_dict.items() if value > random.randint(1,10000))
print(new_dict)
new_dict2 = {key:value for key, value in main_dict.items() if value > random.randint(1,10000)}
print(new_dict2)

{'m': 9841, 'z': 8969, 'b': 8263, 'X': 8476, 'l': 6100, 'E': 6563, 'g': 6093, 'L': 8793, 'F': 3930, 'j': 4798, 'o': 7369, 'Z': 1395, 'u': 9219, 's': 2931, 'U': 8174, 'D': 5764, 'Q': 5261, 'y': 2148, 'I': 6300, 'C': 5669, 'T': 2524, 'e': 5170, 'c': 9756, 'W': 7192, 'V': 4765}
{'m': 9841, 'z': 8969, 'b': 8263, 'X': 8476, 'l': 6100, 'r': 4230, 'h': 5950, 'E': 6563, 'g': 6093, 'L': 8793, 'w': 1733, 'j': 4798, 'o': 7369, 'u': 9219, 'D': 5764, 'a': 2673, 'A': 5895, 'I': 6300, 'e': 5170, 'c': 9756, 'W': 7192}
