# Python Data Structures

## String Manipulation

slicing - slice out substrings, sublists, subtuples using indexes

[start : end + 1 : step]

In [5]:
print("Demo of index slicing:")

x = 'computer'
print(x[1:4])
print(x[1:6:2])
print(x[3:])
print(x[:5])
print(x[-1])
print(x[-3:])
print(x[:-2])

Demo of index slicing:
omp
opt
puter
compu
r
ter
comput


Slicing starts from the starting index and ends with the next index past your target. You can also add steps which tells python to skip over x indexes for every index iterated through.

adding and concatenating - combine 2 sequences of the same type by using +.

In [9]:
# string
x = 'horse' + 'shoe'
print(x)

# list
y = ['pig', 'cow'] + ['horse']
print(y)

# tuple
z = ('Kevin', 'Niklas', 'Jenny') + ('Craig',)
print(z)


horseshoe
['pig', 'cow', 'horse']
('Kevin', 'Niklas', 'Jenny', 'Craig')


for tuples, you must include the comma after the second tuple or tuple object for the interpreter to recognize that you want to concatenate both into one. failing to do so results in a Type Error.

### Collections

checking membership - test whether an item is or is not in a sequence

In [None]:
# string
x = 'bug'
print('u' in x) # True

# list
y = ['pig', 'cow', 'horse']
print('cow' not in y) # False

# tuple
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print('Niklas' in z) # True

iterating - iterating through the items in a sequence

In [None]:
# item
x = [7, 8, 3]
for item in x:
    print(item)

# index and item
y = [7, 8, 3]
for index, item in enumerate(y):    # calling enumerate returns both the value and its index
    print(index, item)

minimum - find the minimum item in a sequence lexicographically. Alpha or numeric types, but cannot mix types.

In [12]:
# string
x = 'bug'
print(min(x))

# list
y = ['pig', 'cow', 'horse']
print(min(y))

# tuple
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print(min(z))

# numeric
yy = [3, 4, 1, 7, 8]
print(min(yy))

b
cow
Craig
1


maximum - find the maximum item in a sequence lexicographically. Alpha or numeric types, but cannot mix types.

In [13]:
# string
x = 'bug'
print(max(x))

# list
y = ['pig', 'cow', 'horse']
print(max(y))

# tuple
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print(max(z))

# numeric
yy = [3, 4, 1, 7, 8]
print(max(yy))

u
pig
Niklas
8


sum - find the sum of items in a sequence. Entire sequence must by numeric.

In [14]:
# string -> error
# x = [5, 7, 'bug']
# print(sum(x)) # generates an error

#list
y = [2, 5, 8, 12]
print(sum(y))
print(sum(y[-2:])) # getting the sum of a sliced list - sum of part of the list

# tuple
z = (50, 4, 7, 19)
print(sum(z))


27
20
80


### Sorting methods

basic sorting - returns a new list of items in sorted order.
Does not change the original list.

In [15]:
# string
x = 'bug'
print(sorted(x))

# list
y = ['pig', 'cow', 'horse']
print(sorted(y))

# tuple
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print(sorted(z))

# numeric
yy = [3, 4, 1, 7, 8]
print(sorted(yy))

['b', 'g', 'u']
['cow', 'horse', 'pig']
['Craig', 'Jenny', 'Kevin', 'Niklas']
[1, 3, 4, 7, 8]


sorting by second letter.
Add a key parameter and a lambda function to return the second character.
(the word 'key' here is a defined parameter name, k is an arbitrary variable name)

In [16]:
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print(sorted(z, key=lambda k: k[1]))

['Kevin', 'Jenny', 'Niklas', 'Craig']


count of item

In [18]:
# string
x = 'bug'
print(x.count('p'))

# list
y = ['pig', 'cow', 'horse', 'cow']
print(y.count('cow'))

# tuple
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print(z.count('Kevin'))

# numeric
yy = [3, 4, 1, 7, 8]
print(yy.count(7))

0
2
1
1


index of item - returns the first occurance of an item

In [22]:
# string
x = 'hippo'
print(x.index('p'))
print(x.index('p'))

# list
y = ['pig', 'cow', 'horse', 'cow']
print(y.index('cow'))

# tuple
z = ('Kevin', 'Niklas', 'Jenny', 'Craig')
print(z.index('Kevin'))

# numeric
yy = [3, 4, 1, 7, 8]
print(yy.index(7))

2
2
1
0
3


## Collections in Detail

### Lists

- General purpose
- Most widely used data structure
- Grow and shrink size as needed
- Sequence type
- Sortable (very useful for sorting)

constructors - creating a new list

In [5]:
x = list()
y = ['a', 25, 'dog', 8.43]
tuple1 = (10, 20)
z = list(tuple1) # this creates a copy of tuple1 in list form
print(x)
print(y)
print(z)

# list comprehension
a = [m for m in range(8)]
print(a)
b = [i**2 for i in range(10) if i>4]
print(b)

[]
['a', 25, 'dog', 8.43]
[10, 20]
[0, 1, 2, 3, 4, 5, 6, 7]
[25, 36, 49, 64, 81]


delete - delete a list or an item in a list

In [6]:
x = [5, 3, 8, 6]
del(x[1])
print(x)
del(x)

[5, 8, 6]


append - append an item to a list

In [7]:
x = [5, 3, 8, 6]
x.append(7)
print(x)

[5, 3, 8, 6, 7]


extend - append a sequence to a list. useful for adding another list to a list without making it 2D. <br>Similar to concatenation

In [20]:
x = [5, 3, 8, 6]
y = [12, 13]

print("Extend vs append comparison:")
x.extend(y)
print(f"x extend y: {x}")

del(x[-2:]) # undoes the extend function that was passed

x.append(y)
print(f"x append y: {x}")

Extend vs append comparison:
x extend y: [5, 3, 8, 6, 12, 13]
x append y: [5, 3, 8, 6, [12, 13]]


insert - insert an item at a given index

In [21]:
x = [5, 3, 8, 6]
x.insert(1, 7)
print(x)
x.insert(1, ['a', 'm'])     # inserting ['a', 'm'] at index 1
print(x)

[5, 7, 3, 8, 6]
[5, ['a', 'm'], 7, 3, 8, 6]


pop - pops last item off the list and returns item

In [23]:
x = [5, 3, 8, 6]
x.pop()
print(x)        # returns list with popped 6
print(x.pop())  # returns the popped item, which is 8

[5, 3, 8]
8


remove - remove the first instance of an item

In [24]:
x = [5, 3, 8, 6, 3]
x.remove(3)
print(x)

[5, 8, 6, 3]


reverse - reverse the order of the list. it is an in-place sort and changes the original list.

In [25]:
x = [5, 3, 8, 6]
x.reverse()
print(x)

[6, 8, 3, 5]


sort - sort the list in place

sorted(x) returns new sorted list without changing the original list x. <br>
x.sort() puts the items of x in sorted order(sorts in place)

In [29]:
x = [5, 3, 8, 6]
x.sort()
print(x)

# sorted does the same thing
x = [5, 3, 8, 6] 
sorted(x)

[3, 5, 6, 8]


[3, 5, 6, 8]

### Tuples

- immutable (can't add/change)
- Useful for fixed data
- Faster than lists
- Sequence type

constructors - creating new tuples

In [32]:
# different constructors for tuples
x = ()
x = (1, 2, 3)
x = 1, 2, 3
x = 2,
print(x, type(x))

# converting a list into a tuple
list1 = [2, 4, 6]
x = tuple(list1)
print(x, type(x))

(2,) <class 'tuple'>
(2, 4, 6) <class 'tuple'>


tuples are immutable, but member objects may be mutable

In [33]:
x = (1, 2, 3)
# del(x[1])         # fails
# x[1] = 8          # fails
print(x)

y = ([1, 2], 3)     # a tuple where the first item is a list
del(y[0][1])        # you can delete the 2
print(y)            # the list within the tuple is mutable

y += (4,)           # concatenating two tuples 
print(y)

(1, 2, 3)
([1], 3)
([1], 3, 4)


### Sets

- Store non-duplicate items
- Very fast access vs Lists
- Math Set operations (union, intersect)
- Sets are Unordered

In [34]:
# Set Constructors

x = {3, 5, 3, 5}
print(x)

y = set()
print(y)

list1 = [2, 3, 4]
z = set(list1)
print(z)

{3, 5}
set()
{2, 3, 4}


set operations

In [36]:
x = {3, 8, 5}


print(x)
x.add(7)                # adding an item to a set
print(x)

x.remove(3)             # removing an item from a set
print(x)

print(len(x))           # get length of set x

print(5 in x)           # check membership in x

print(x.pop(), x)       # pop random item from set x

x.clear()               # delete all items from set x
print(x)

{8, 3, 5}
{8, 3, 5, 7}
{8, 5, 7}
3
True
8 {5, 7}
set()


<b> Mathematical set operations </b>                    <br>
intersection (AND): set1 & set2                         <br>
union (OR): set1 | set1                                 <br>
symmetric difference (XOR): set1 ^ set2                 <br>
difference (in set1 but not in set2): set1 - set2       <br>
subset (set2 contains set1): set1 <= set2               <br>
superset (set1 contains set2): set1 >= set2


In [37]:
s1 = {1, 2, 3}
s2 = {3, 4, 5}
print(s1 & s2)          # AND
print(s1 | s2)          # UNION/OR
print(s1 ^ s2)          # XOR
print(s1 - s2)          # 
print(s1 <= s2)         #
print(s1 >= s2)         #

{3}
{1, 2, 3, 4, 5}
{1, 2, 4, 5}
{1, 2}
False
False


### Dictionaries

- Key/Value Pairs
- Associative array, like Java HashMap
- Dicts are Unordered

In [4]:
# constructors of dictionaries
print("\ndictionary constructors:\n")

x = {'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}
print(x)
x = dict([('pork', 25.3), ('beef', 33.8), ('chicken', 22.7)])
print(x)
x = dict(pork=25.3, beef=33.8, chicken=22.7)
print(x)


print("\ndictionary operations:\n")


x['shrimp'] = 38.2          # adds or updates the dictionary of x
x['lobster'] = 100.8
print(x)

del(x['shrimp'])            # deletes an item
print(x)

print(len(x))               # get length of dict x

x.clear()                   # delete all items from dict x
print(x)

del(x)                      # delete dict x - this frees up the memory allocated to this dict


dictionary constructors:

{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}
{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}
{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}

dictionary operations:

{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7, 'shrimp': 38.2, 'lobster': 100.8}
{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7, 'lobster': 100.8}
4
{}


In [6]:
print("\naccessing keys and values in a dict:\nNot compatible in Python 2\n")

x = dict(pork=25.3, beef=33.8, chicken=22.7)

print(x.keys())
print(x.values())
print(x.items())            # key-value pairs

print('beef' in x)          # check membership in y_keys (only looks in keys, not values)

print('clams' in x.values())# check membership in y_values


accessing keys and values in a dict:
Not compatible in Python 2

dict_keys(['pork', 'beef', 'chicken'])
dict_values([25.3, 33.8, 22.7])
dict_items([('pork', 25.3), ('beef', 33.8), ('chicken', 22.7)])
True
False


iterating a dictionary

In [8]:
y = dict(pork=25.3, beef=33.8, chicken=22.7)

for key in y:
    print(key, y[key])

for k, v in y.items():   # using this method y.items() returns a tuple of the key and value and assigns them to their repected variable
    print(k, v)

pork 25.3
beef 33.8
chicken 22.7
pork 25.3
beef 33.8
chicken 22.7


## List Comprehensions

List comprehensions can be done using a for loop inside of a list constructor. <br><br>

basic format: new_list = [transform sequence [filter]]

In [26]:
import random

# get values within a range

under_10 = [x for x in range(10)]
print(f"under_10: {str(under_10)}")         

# get squared values
squares = [x ** 2 for x in under_10]
print(f"squares: {str(squares)}")

# get odd numbers using mod
odds = [x for x in range(10) if x % 2 == 1]
print(f"odds: {str(odds)}")

# get multiples of 10
ten_x = [x * 10 for x in range(10)]
print(f"ten_x: {str(ten_x)}")

#  get all numbers from a string
s = "I love 2 go t0 the store 7 times a w3ek."
nums = [x for x in s if x.isnumeric()]
print(f"nums: {''.join(nums)}")

# get index of a list item
    # recall that sets and dicts are hashed and easier to find a given index
    # list comprehensions are great for finding certain indexes of lists
names = ['Cosmo', 'Pedro', 'Anu', 'Ray']
idx = [k for k, v in enumerate(names) if v == 'Anu']    # given the for loop k = 2
print(f"index = {str(idx[0])}")

# delete an item from a list
letters = [x for x in 'ABCDEF']
random.shuffle(letters)
letrs = [a for a in letters if a != 'C']
print(letters, letrs)

# if-else condition in a comprehension
    # this must come before iteration
nums = [5, 3, 10, 18, 6, 7]
new_list = [x if x%2 == 0 else 10*x for x in nums]
print(f"new_list: {str(new_list)}")

# nested loop iteration for 2D list
    # b is the subsets, x is the values
a = [[1, 2],[3, 4]]
new_list = [x for b in a for x in b]
print(new_list)


under_10: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
squares: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
odds: [1, 3, 5, 7, 9]
ten_x: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
nums: 2073
index = 2
['D', 'F', 'B', 'E', 'A', 'C'] ['D', 'F', 'B', 'E', 'A']
new_list: [50, 30, 10, 18, 6, 70]
[1, 2, 3, 4]
