# Dictionary

In [1]:
ages = {'Bob' : 27, 'Alice' : 28, 'Bill' : 17}

In [2]:
print(ages)  # DO NOT rely on ordering

{'Bob': 27, 'Alice': 28, 'Bill': 17}


In [3]:
ages['Sarah']

KeyError: 'Sarah'

In [4]:
ages['Sarah'] = 18
ages

{'Bob': 27, 'Alice': 28, 'Bill': 17, 'Sarah': 18}

In [5]:
ages['Bob'] = ages['Sarah'] + 4
ages

{'Bob': 22, 'Alice': 28, 'Bill': 17, 'Sarah': 18}

In [6]:
print(ages.get('Karl'))

None


In [7]:
ages

{'Bob': 22, 'Alice': 28, 'Bill': 17, 'Sarah': 18}

In [8]:
ages['Karl'] = 22
ages

{'Bob': 22, 'Alice': 28, 'Bill': 17, 'Sarah': 18, 'Karl': 22}

In [9]:
ages.update({'Joe': 30}) # Add a new key, value pair to the dictionary
ages

{'Bob': 22, 'Alice': 28, 'Bill': 17, 'Sarah': 18, 'Karl': 22, 'Joe': 30}

In [10]:
ages.update({'Joe': 35}) # Update an existing key with a new value 
ages['Sarah'] = 22 # Or simply use assignment to update an existing key with a new value 
ages

{'Bob': 22, 'Alice': 28, 'Bill': 17, 'Sarah': 22, 'Karl': 22, 'Joe': 35}

In [11]:
ages.pop('Bill')

17

In [12]:
ages

{'Bob': 22, 'Alice': 28, 'Sarah': 22, 'Karl': 22, 'Joe': 35}

In [13]:
ages.pop()

TypeError: pop expected at least 1 argument, got 0

### Dictionary keys are unique

In [14]:
ages = {'Bob' : 27, 'Alice' : 28, 'Bob': 28, 'Bill' : 17}
ages

{'Bob': 28, 'Alice': 28, 'Bill': 17}

### Both the keys and values of the dictionary are evaluated when it's defined

In [15]:
a = 'xxx'
b = 'yyy'
x = 2
dict1 = {a: 1, b: x**3}
dict1

{'xxx': 1, 'yyy': 8}

### Counting occurrences with a dictionary

In [16]:
# the clown ran after the car and the car ran into the tent and the tent fell down on the clown and the car
counts = dict()
line = input('Enter a line of text:\n')
words = line.split()
print('Words:', words)
print('Counting...')
for word in words:
    counts[word] = counts.get(word,0) + 1
print('Counts', counts)

Enter a line of text:
 the clown ran after the car and the car ran into the tent and the tent fell down on the clown and the car


Words: ['the', 'clown', 'ran', 'after', 'the', 'car', 'and', 'the', 'car', 'ran', 'into', 'the', 'tent', 'and', 'the', 'tent', 'fell', 'down', 'on', 'the', 'clown', 'and', 'the', 'car']
Counting...
Counts {'the': 7, 'clown': 2, 'ran': 2, 'after': 1, 'car': 3, 'and': 3, 'into': 1, 'tent': 2, 'fell': 1, 'down': 1, 'on': 1}


### We can go through a dictionary with a definite loop

In [17]:
for key in counts:
    print(key, counts[key])

the 7
clown 2
ran 2
after 1
car 3
and 3
into 1
tent 2
fell 1
down 1
on 1


In [18]:
counts.keys()

dict_keys(['the', 'clown', 'ran', 'after', 'car', 'and', 'into', 'tent', 'fell', 'down', 'on'])

In [19]:
print(list(counts.keys()))

['the', 'clown', 'ran', 'after', 'car', 'and', 'into', 'tent', 'fell', 'down', 'on']


Note how the 'list' function turns that into an actual list structure. However, Python allows us to iterate on the result of 'counts.keys()' as if it were the same.

In [20]:
print(list(counts.values()))

[7, 2, 2, 1, 3, 3, 1, 2, 1, 1, 1]


In [21]:
print(list(counts.items()))

[('the', 7), ('clown', 2), ('ran', 2), ('after', 1), ('car', 3), ('and', 3), ('into', 1), ('tent', 2), ('fell', 1), ('down', 1), ('on', 1)]


Notice that counts.items() is not the same as the value of 'counts'! <br/>
List of tuples vs dictionary

In [22]:
print(counts)

{'the': 7, 'clown': 2, 'ran': 2, 'after': 1, 'car': 3, 'and': 3, 'into': 1, 'tent': 2, 'fell': 1, 'down': 1, 'on': 1}


### Iterating over dictionary items, both keys and values, at the same time

In [23]:
for word, count in counts.items():
    print(word, 'appears', count, 'times')

the appears 7 times
clown appears 2 times
ran appears 2 times
after appears 1 times
car appears 3 times
and appears 3 times
into appears 1 times
tent appears 2 times
fell appears 1 times
down appears 1 times
on appears 1 times


### And now to find the most frequent word in any text file:

### (Try 'romeo.txt', 'clown.txt', 'mbox.txt', 'mbox-short.txt')

__Content of file _romeo.txt_:__<br>
But soft what light through yonder window breaks<br>
It is the east and Juliet is the sun<br>
Arise fair sun and kill the envious moon<br>
Who is already sick and pale with grief<br>

__Content of file _clown.txt_:__<br>
the clown ran after the car and the car ran into the tent and the tent fell down on the clown and the car

In [24]:
name = input('Enter file: ')
handle = open(name)
text = handle.read()
words = text.split()
counts = dict()
for word in words:
    counts[word] = counts.get(word,0) + 1
print(counts)
    
bigcount = None
bigword = None
for word,count in counts.items():
    if bigcount is None or count > bigcount:
        bigword = word
        bigcount = count
print (bigword, 'appears', bigcount, 'times')

Enter file:  romeo.txt


{'But': 1, 'soft': 1, 'what': 1, 'light': 1, 'through': 1, 'yonder': 1, 'window': 1, 'breaks': 1, 'It': 1, 'is': 3, 'the': 3, 'east': 1, 'and': 3, 'Juliet': 1, 'sun': 2, 'Arise': 1, 'fair': 1, 'kill': 1, 'envious': 1, 'moon': 1, 'Who': 1, 'already': 1, 'sick': 1, 'pale': 1, 'with': 1, 'grief': 1}
is appears 3 times


## Copying Dictionaries: Shallow and Deep Copy

In [25]:
d1 = {'a': 1, 'b': {'c':3, 'd':4}, 'e':5}

In [26]:
d2 = d1  # assignment both d1 and d2 point to the same location in memory

In [27]:
d1 == d2

True

In [28]:
d1 is d2

True

In [29]:
# id() returns a unique integer for each object in Python. In the CPython implementation is the memory address
print(id(d1))
print(id(d2))

140573599583744
140573599583744


In [30]:
d1['e'] = 55
print(d1)
print(d2)

{'a': 1, 'b': {'c': 3, 'd': 4}, 'e': 55}
{'a': 1, 'b': {'c': 3, 'd': 4}, 'e': 55}


In [31]:
d3 = d1.copy() # shallow copy 

In [32]:
d3 == d1

True

In [33]:
d3 is d1  # d1 and d3 are at different locations in memory

False

In [34]:
print(id(d1))
print(id(d2))
print(id(d3))

140573599583744
140573599583744
140573599585088


In [35]:
d3['a'] = 3
print(d1)
print(d2)
print(d3)

{'a': 1, 'b': {'c': 3, 'd': 4}, 'e': 55}
{'a': 1, 'b': {'c': 3, 'd': 4}, 'e': 55}
{'a': 3, 'b': {'c': 3, 'd': 4}, 'e': 55}


In [36]:
d3['b']['d'] 

4

In [37]:
d2['b']['d'] = 77
print(d1)
print(d2)
print(d3)

{'a': 1, 'b': {'c': 3, 'd': 77}, 'e': 55}
{'a': 1, 'b': {'c': 3, 'd': 77}, 'e': 55}
{'a': 3, 'b': {'c': 3, 'd': 77}, 'e': 55}


In [38]:
import copy
d4 = copy.deepcopy(d1)
d4

{'a': 1, 'b': {'c': 3, 'd': 77}, 'e': 55}

In [39]:
d1['b']['c']

3

In [40]:
d1['b']['c'] = 99
print(d1)
print(d2)
print(d3)
print(d4)

{'a': 1, 'b': {'c': 99, 'd': 77}, 'e': 55}
{'a': 1, 'b': {'c': 99, 'd': 77}, 'e': 55}
{'a': 3, 'b': {'c': 99, 'd': 77}, 'e': 55}
{'a': 1, 'b': {'c': 3, 'd': 77}, 'e': 55}


back to slides

---
# Sorting Dictionaries

Starting with Python 3.7, dictionaries preserve their order of initialization.

In [58]:
word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}

In [60]:
list(word_counts.items())

[('car', 1), ('drove', 2), ('the', 4), ('girl', 1)]

In [41]:
word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}
for word in word_counts:
    print(word, word_counts[word])

car 1
drove 2
the 4
girl 1


__Sorting by key:__

In [42]:
word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}
for word in sorted(word_counts):
    print(word, word_counts[word])

car 1
drove 2
girl 1
the 4


__Sorting by value:__

In [48]:
word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}
for word in sorted(word_counts, key=word_counts.get, reverse=True):
    print(word, word_counts[word])

the 4
drove 2
car 1
girl 1


__Sorting by value using a lambda function__<br>
Using a _function_ as a _sorting key_ adds a lot of flexibility.<br>
A _lambda function_ is a small anonymous function, i.e. a function that has only a single expression and no name.

In the examples below, we sort over the word_counts.items(), i.e. (key, value) tuples.<br>
Therefore, the input _x_ of the lambda function is a (key, value) tuple,<br>
so _x[0]_ refers to the key and _x[1]_ refers to the value of the (key, value) tuple.

In [49]:
word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}
for word, count in sorted(word_counts.items(), key=lambda x: x[1], reverse=True):
    print(word, count)

the 4
drove 2
car 1
girl 1


__Sorting by length of key:__

In [50]:
word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}
for word, count in sorted(word_counts.items(), key=lambda x: len(x[0]), reverse=True):
    print(word, count)

drove 2
girl 1
car 1
the 4


__Same code, but with lambda function expanded to regular function:__

In [51]:
def x0_length(x):     # x is a (key, value) tuple  # lambda x: len(x[0])
    return len(x[0])

word_counts = {'car': 1, 'drove': 2, "the": 4, "girl": 1}
for word, count in sorted(word_counts.items(), key=x0_length, reverse=True):
    print(word, count)

drove 2
girl 1
car 1
the 4


In [52]:
sorted(word_counts.items(), key=x0_length, reverse=True)

[('drove', 2), ('girl', 1), ('car', 1), ('the', 4)]

In [53]:
sorted(word_counts.items(), key=x0_length)

[('car', 1), ('the', 4), ('girl', 1), ('drove', 2)]

In [54]:
??sorted

[0;31mSignature:[0m [0msorted[0m[0;34m([0m[0miterable[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0;34m,[0m [0mkey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mreverse[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a new list containing all items from the iterable in ascending order.

A custom key function can be supplied to customize the sort order, and the
reverse flag can be set to request the result in descending order.
[0;31mType:[0m      builtin_function_or_method

# Organize your data using appropriate data structures

Imagine you have a temperatures of cities using different units: a list of dictionaries may be a good solution

In [56]:
temp_records = [ {'city':'Los Angeles', 'temperature': '51', 'unit': 'Fahrenheit'}, 
                 {'city':'Madrid', 'temperature': '4', 'unit': 'Celsius'}, 
                 {'city':'New York', 'temperature': '18', 'unit': 'Fahrenheit'}, 
                 {'city':'New Delhi', 'temperature': '20', 'unit': 'Celsius'} 
               ]

In [57]:
for r in temp_records :
    print(f"The temperature in {r['city']} is {r['temperature']} {r['unit']}")

The temperature in Los Angeles is 51 Fahrenheit
The temperature in Madrid is 4 Celsius
The temperature in New York is 18 Fahrenheit
The temperature in New Delhi is 20 Celsius


Suggestion: Write a program that shows all the temperatures in temp_records in Celsius

# Tuples

In [None]:
fhand = open('romeo.txt')
counts = {}
for line in fhand:
    words = line.split()
    # print(words)
    for word in words:
        counts[word] = counts.get(word,0) + 1
        # print(word, counts[word])
        
print(counts)
        
lst = []
for key, val in counts.items():
    newtup = (val, key)
    lst.append(newtup)

print(lst)

lst = sorted(lst, reverse=True)

print(lst)

for val, key in lst[:5]:
    print (key,val)

### Quick way to sort pairs by second element

In [None]:
c = {'a' : 10, 'b' : 1, 'c' : 22}
c1 = sorted([(v,k) for (k,v) in c.items()]  , reverse=True)
print (c1)

# Tuples as keys in dictionaries
### Dictionary keys must be immutable
* Allowed as keys: int, float, string, bool, __tuple__
* Not allowed as keys: list, set, dict (but allowed as values)

In [63]:
d = {}
d[("John Smith", "email")] = "jsmith@usc.edu"
d[("John Smith", "birthday")] = "July 4, 1997"
d[("John Smith", "age")] = 24
d[("John Smith", "married")] = True
d[("John Smith", "wife")] = "Alice Smith"
d[("Alice Smith", "email")] = "asmith@ucla.edu"
d[("Alice Smith", "programming languages")] = ["Python", "Java", "C++"]
d[("Alice Smith", "address")] = ("Los Angeles", "824 Hilgard Avennue")
d[("Los Angeles", "population")] = 3898747
d[("Los Angeles", "mayor")] = "Eric Garcetti"
d[("Python", "latest version")] = "3.10.2"
d[("Python", "creator")] = "Guido van Rossum"
d[("Guido van Rossum", "twitter")] = "@gvanrossum"

print(d)
print("\nJohn Smith's wife's email:", \
      d.get( ( d.get(("John Smith", "wife")), "email") ) )

{('John Smith', 'email'): 'jsmith@usc.edu', ('John Smith', 'birthday'): 'July 4, 1997', ('John Smith', 'age'): 24, ('John Smith', 'married'): True, ('John Smith', 'wife'): 'Alice Smith', ('Alice Smith', 'email'): 'asmith@ucla.edu', ('Alice Smith', 'programming languages'): ['Python', 'Java', 'C++'], ('Alice Smith', 'address'): ('Los Angeles', '824 Hilgard Avennue'), ('Los Angeles', 'population'): 3898747, ('Los Angeles', 'mayor'): 'Eric Garcetti', ('Python', 'latest version'): '3.10.2', ('Python', 'creator'): 'Guido van Rossum', ('Guido van Rossum', 'twitter'): '@gvanrossum'}

John Smith's wife's email: asmith@ucla.edu


### A dictionary with tuple keys can become a powerful web of information!

back to slides

# Sets

In [None]:
odds = {1, 3, 5, 7, 9}
evens = {2, 4, 6, 8}
primes = {2, 3, 5, 7}

print('odds.intersection(primes) == odds & primes == ', odds & primes)
print('odds.union(evens) == odds | evens == ', odds | evens)
print('odds.intersection(evens) == odds | evens == ', odds | evens)
print('odds.difference(primes) == odds - primes == ', odds - primes)
print('primes.difference(odds) == primes - odds ==', primes - odds)
print('odds.symmetric_difference(primes) == (odds - primes)|(primes - odds) == ', (odds - primes)|(primes - odds) )
print('not (odds & evens) == odds.isdisjoint(evens) == ', not odds & evens )

In [64]:
d  = { 1 : 3 , 2 : 2 , 3 : 1}

In [65]:
d

{1: 3, 2: 2, 3: 1}

In [68]:
sorted( [ (v,k) for k,v in d.items() ] )[0][1]

3

In [73]:
sorted( d.items(), key=lambda x : x[1] )[0][0]

3

In [74]:
!pwd

/Users/ambite/Documents/USC/classes/DSCI-510/DSCI-510-Spring-2024-Ambite/Week5-dictionaries-tuples-sets
