In [1]:
import pandas as pd
import numpy as np

## Common list object methods

In [2]:
my_list = []

In [3]:
my_list.append("Pay bills")
my_list.append("Tidy up")
my_list.append("Walk the dog")
my_list.append("Cook dinner")

In [4]:
my_list

['Pay bills', 'Tidy up', 'Walk the dog', 'Cook dinner']

In [5]:
print(my_list[0])

Pay bills


In [6]:
i = my_list.index('Cook dinner')

In [7]:
my_list.insert(i, 'Go to the pharmacy')

In [8]:
my_list

['Pay bills', 'Tidy up', 'Walk the dog', 'Go to the pharmacy', 'Cook dinner']

In [9]:
print(my_list.count('Tidy up'))

1


## Use Slice Notation

In [10]:
print(my_list[0:3])

['Pay bills', 'Tidy up', 'Walk the dog']


In [11]:
print(my_list[:3])

['Pay bills', 'Tidy up', 'Walk the dog']


In [12]:
print(my_list[3:])

['Go to the pharmacy', 'Cook dinner']


In [13]:
print(my_list[:])

['Pay bills', 'Tidy up', 'Walk the dog', 'Go to the pharmacy', 'Cook dinner']


In [14]:
my_list[len(my_list):]=["Mow the lawn","Water plants"]

In [15]:
print(my_list)

['Pay bills', 'Tidy up', 'Walk the dog', 'Go to the pharmacy', 'Cook dinner', 'Mow the lawn', 'Water plants']


In [16]:
del my_list[5:]

In [17]:
print(my_list)

['Pay bills', 'Tidy up', 'Walk the dog', 'Go to the pharmacy', 'Cook dinner']


## Using a list as a queue

In [18]:
from collections import deque
queue = deque(my_list)
queue.append('Wash the car')
print(queue.popleft(),' - Done!')
my_list_upd=list(queue)

Pay bills  - Done!


In [19]:
my_list_upd

['Tidy up',
 'Walk the dog',
 'Go to the pharmacy',
 'Cook dinner',
 'Wash the car']

## Using a list as a stack

In [20]:
my_list = ['Pay bills','Tidy up','Walk the dog','Go to the pharmacy','Cook dinner']
stack = []
for task in my_list:
    stack.append(task)
while stack:
    print(stack.pop(), ' - Done!')
print('\nThe stack is empty')

Cook dinner  - Done!
Go to the pharmacy  - Done!
Walk the dog  - Done!
Tidy up  - Done!
Pay bills  - Done!

The stack is empty


## using lists and stacks for natural language processing

In [1]:
import spacy

In [2]:
nlp=spacy.load('en_core_web_sm')

In [3]:
txt = "list is a ubiquitous data structure in the Python programming language."
doc=nlp(txt)
stk=[]
for w in doc:
    if w.pos_ == "NOUN" or w.pos_ == "PROPN":
        stk.append(w.text)
    elif (w.head.pos_ == "NOUN" or w.head.pos_ == "PROPN") and \
         (w in w.head.lefts):
        stk.append(w.text)
    elif stk:
        chunk = ''
        while stk:
            chunk = stk.pop() + ' ' + chunk
        print(chunk.strip())

list
a ubiquitous data structure
the Python programming language


In [9]:
doc[2].pos_

'DET'

In [None]:
doc

In [10]:
nlp(txt)[2].pos_

'DET'

## Making improvements with List Comprehensions

In [30]:
txt = "list is a ubiquitous data structure in the Python programming language."
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(txt)
for t in doc:
    print(t.text, t.head.text)

list is
is is
a structure
ubiquitous structure
data structure
structure is
in structure
the language
Python language
programming language
language in
. is


In [31]:
txt = "list is a ubiquitous data structure in the Python programming language."
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(txt)
head_lefts = [t.text if t in t.head.lefts else 0 for t in doc]
print(head_lefts)

['list', 0, 'a', 'ubiquitous', 'data', 0, 0, 'the', 'Python', 'programming', 0, 0]


In [36]:
txt = "List is arguably the most useful type in the Python programming language."
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(txt)
for w in doc:
    head_lefts = [t.text if t in t.head.lefts else 0 for t in doc[w.i:]]
    print(head_lefts)

['List', 0, 0, 'the', 'most', 'useful', 0, 0, 'the', 'Python', 'programming', 0, 0]
[0, 0, 'the', 'most', 'useful', 0, 0, 'the', 'Python', 'programming', 0, 0]
[0, 'the', 'most', 'useful', 0, 0, 'the', 'Python', 'programming', 0, 0]
['the', 'most', 'useful', 0, 0, 'the', 'Python', 'programming', 0, 0]
['most', 'useful', 0, 0, 'the', 'Python', 'programming', 0, 0]
['useful', 0, 0, 'the', 'Python', 'programming', 0, 0]
[0, 0, 'the', 'Python', 'programming', 0, 0]
[0, 'the', 'Python', 'programming', 0, 0]
['the', 'Python', 'programming', 0, 0]
['Python', 'programming', 0, 0]
['programming', 0, 0]
[0, 0]
[0]


In [38]:
txt = "List is arguably the most useful type in the Python programming language."
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(txt)
for w in doc:
    head_lefts = [t.text if t in t.head.lefts else 0 for t in doc[w.i:]]
    i0 = head_lefts.index(0)
    if i0 > 0:
        noun = [1 if t.pos_ == "NOUN" or t.pos_ == "PROPN" else 0 for t in
                   reversed(doc[w.i:w.i+i0 + 1])]
        try:
            i1 = noun.index(1)+1
        except ValueError:
            pass
        print(head_lefts[:i0 + 1])
        print(doc[w.i+i0 + 1 -i1])

['List', 0]
List
['the', 'most', 'useful', 0]
type
['most', 'useful', 0]
type
['useful', 0]
type
['the', 'Python', 'programming', 0]
language
['Python', 'programming', 0]
language
['programming', 0]
language


In [41]:
txt = "List is arguably the most useful type in the Python programming language."
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(txt)
stk=[]

for w in doc:
    head_lefts = [1 if t in t.head.lefts else 0 for t in doc[w.i:]]
    i0 = 0
    try: i0 = head_lefts.index(0)
    except ValueError: pass
    i1 = 0
    if i0 > 0:
        noun = [1 if t.pos_ == "NOUN" or t.pos_ == "PROPN" else 0 for t in
                   reversed(doc[w.i:w.i+i0 + 1])]
        try: i1 = noun.index(1)+1
        except ValueError: pass
    if w.pos_ == "NOUN" or w.pos_ == "PROPN":
        stk.append(w.text)
    elif (i1>0):
        stk.append(w.text)
    elif stk:
        chunk = ''
        while stk:
            chunk = stk.pop() + ' ' + chunk
        print(chunk.strip())

List
the most useful type
the Python programming language


## A list of tuples

In [42]:
task_list = ['Pay bills','Tidy up','Walk the dog','Go to the pharmacy','Cook dinner']
tm_list = ['8:00','8:30','9:30','10:00','9:30']
sched_list = [(tm,task) for tm, task in zip(tm_list,task_list)]

In [43]:
print(sched_list)

[('8:00', 'Pay bills'), ('8:30', 'Tidy up'), ('9:30', 'Walk the dog'), ('10:00', 'Go to the pharmacy'), ('9:30', 'Cook dinner')]


In [44]:
print(sched_list[1][0])

8:30


## Dictionaries

In [46]:
dict_list = [
    {'time':'8:00', 'name':'Pay bills'},
    {'time':'8:30', 'name':'Tidy up'},
    {'time':'9:30', 'name':'Walk the dog'},
    {'time':'10:00', 'name':'Go to the pharmacy'},
    {'time':'10:30', 'name':'Cook dinner'}
]

In [47]:
dict_list

[{'time': '8:00', 'name': 'Pay bills'},
 {'time': '8:30', 'name': 'Tidy up'},
 {'time': '9:30', 'name': 'Walk the dog'},
 {'time': '10:00', 'name': 'Go to the pharmacy'},
 {'time': '10:30', 'name': 'Cook dinner'}]

In [48]:
dict_list[1]['time'] = '9:00'

In [49]:
dict_list

[{'time': '8:00', 'name': 'Pay bills'},
 {'time': '9:00', 'name': 'Tidy up'},
 {'time': '9:30', 'name': 'Walk the dog'},
 {'time': '10:00', 'name': 'Go to the pharmacy'},
 {'time': '10:30', 'name': 'Cook dinner'}]

## Adding to a dictionary with setdefault()

In [50]:
car = {
    "brand":"Volkswagon",
    "style":"Sedan",
    "model":"Jetta"
}

In [51]:
print(car.setdefault("model","Passat"))

Jetta


In [52]:
print(car.setdefault("year",2022))

2022


In [54]:
print(car)

{'brand': 'Volkswagon', 'style': 'Sedan', 'model': 'Jetta', 'year': 2022}


In [62]:
txt = """Python is one of the most promising programming languages today. Due to the simplicity of Python
syntax, many researchers and scientists prefer Python over many other languages."""

In [63]:
txt = txt.replace('.','').replace(',','')

In [64]:
lst = txt.split()
print(lst)

['Python', 'is', 'one', 'of', 'the', 'most', 'promising', 'programming', 'languages', 'today', 'Due', 'to', 'the', 'simplicity', 'of', 'Python', 'syntax', 'many', 'researchers', 'and', 'scientists', 'prefer', 'Python', 'over', 'many', 'other', 'languages']


In [65]:
dct = {}
for w in lst:
    c = dct.setdefault(w,0)
    dct[w]+=1

In [66]:
dct_sorted = dict(sorted(dct.items(), key=lambda x: x[1], reverse=True))

In [67]:
print(dct_sorted)

{'Python': 3, 'of': 2, 'the': 2, 'languages': 2, 'many': 2, 'is': 1, 'one': 1, 'most': 1, 'promising': 1, 'programming': 1, 'today': 1, 'Due': 1, 'to': 1, 'simplicity': 1, 'syntax': 1, 'researchers': 1, 'and': 1, 'scientists': 1, 'prefer': 1, 'over': 1, 'other': 1}


## Loading JSON into a dictionary

In [68]:
d={"PONumber"            :2608,
   "ShippingInstructions":{"name":   "John Silver",
                           "Address":{"street" :"426 Light Street",
                                      "city"   :"South San Francisco", 
                                      "state"  :"CA",
                                      "zipCode":99237,
                                      "country":"USA"},
                           "Phone"  : [ {"type":"Office","number":"809-123-9309"},
                                        {"type":"Mobile","number":"417-123-4567"}
                                      ]                           
                          }
}

In [69]:
import json
with open("po.json","w") as outfile:
    json.dump(d,outfile)

In [72]:
with open("po.json",) as fp:
    d = json.load(fp)

In [74]:
print(d)

{'PONumber': 2608, 'ShippingInstructions': {'name': 'John Silver', 'Address': {'street': '426 Light Street', 'city': 'South San Francisco', 'state': 'CA', 'zipCode': 99237, 'country': 'USA'}, 'Phone': [{'type': 'Office', 'number': '809-123-9309'}, {'type': 'Mobile', 'number': '417-123-4567'}]}}


## Sets

## Removing duplicates from sequences 

In [75]:
lst = ["John Silver","Tim Jemison","John Silver","Maya Smith"]
lst = list(set(lst))

In [76]:
print(lst)

['Tim Jemison', 'Maya Smith', 'John Silver']


In [77]:
lst = ["John Silver","Tim Jemison","John Silver","Maya Smith"]
lst = list(sorted(set(lst), key=lst.index))
print(lst)

['John Silver', 'Tim Jemison', 'Maya Smith']


## Performing common set operations

In [78]:
photo1_tags = {'coffee','breakfast','drink','table','tableware','cup','food'}
photo2_tags = {'food','dish','meat','meal','tableware','dinner','vegetable'}
intersection = photo1_tags.intersection(photo2_tags)
if len(intersection)>=2:
    print("The photos contain similar objects")

The photos contain similar objects
