## List, Dictionary, Set comprehensions

### List comprehension

In [1]:
a = ["a", "list"]

In [2]:

[name for name in dir(a) if not name.startswith("__")]


['append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [3]:
# the same result using a for cycle

buf = []

for name in dir(a):
    if not name.startswith("__"):
        buf.append(name)
        
buf

['append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

### Dictionary comprehension

In [4]:
names = ["Adam", "Betty"]

res = {key: 1 for key in names }

In [5]:
res

{'Adam': 1, 'Betty': 1}

In [6]:
# Using a for cycle:

res2 = {}

for key in names:
    res2[key] = 1
    
res2

{'Adam': 1, 'Betty': 1}

In [7]:
# Another example - filtering a dictionary

data1 = {"John": 25, "Peter": 35, "Anna": 20, "Beatrice": 30}

# filter the dictionary leaving the entries that have values <= 30

data2 = {key: value for key, value in data1.items() if value <= 30}

print(data2)

{'John': 25, 'Anna': 20, 'Beatrice': 30}


In [8]:
# Using a for cycle:

data2 = {}

for key, value in data1.items():
    if value <= 30:
        data2[key] = value

print(data2)

{'John': 25, 'Anna': 20, 'Beatrice': 30}


### Set comprehension

In [9]:
words = ["a", "little", "bit", "about", "nothing", "a", "bit"]

set1 = {word for word in words}

set1

{'a', 'about', 'bit', 'little', 'nothing'}

In [10]:
# This was a simple set comprehension example. You could do that same using the set() function:

set1 = set(words)

set1

{'a', 'about', 'bit', 'little', 'nothing'}

## File Operations (continued)

In [11]:
%%writefile somefile.txt
This is an example file
that we can experiment
with.

Overwriting somefile.txt


In [12]:
fname = "somefile.txt"

file = open(fname, encoding="utf-8")

for line in file:
    print(line.strip())
    
file.close()

This is an example file
that we can experiment
with.


In [13]:
# we can avoid having to explicitly call close()

with open(fname, encoding="utf-8") as file:
    for line in file:
        print(line.strip())
        
# close() gets called automatically when with block ends

This is an example file
that we can experiment
with.


In [14]:
# we can use seek() to got back to the start of the file (in needed)

with open(fname, encoding="utf-8") as file:
    for line in file:
        print(line.strip())
        
    print()
    
    # go to beginning
    file.seek(0)
    
    # print the file again
    for line in file:
        print(line.strip())        

This is an example file
that we can experiment
with.

This is an example file
that we can experiment
with.


### CSV - DictReader and DictWriter

In [15]:
# https://data.gov.lv/dati/lv/dataset/stacionaru-operativie-dati-par-covid19

fname_csv = "data/covidpatients.csv"

In [16]:
import csv

In [17]:
from itertools import islice

with open(fname_csv, encoding="utf-8") as file_csv:
    
    rdr = csv.reader(file_csv, delimiter=";")
    rdr_10_rows = islice(rdr, 10)
    
    data = [line for line in rdr_10_rows]

data

[['Datums',
  'ĀI kods',
  'ĀI nosaukums',
  'Kopā',
  'Jauni',
  'Pamata diagnoze',
  'Blakus diagnoze',
  'Smaga slimības gaita',
  't.sk. Invazīva MPV',
  'Vidēja slimības gaita',
  'Miruši',
  'Izrakstīti',
  'Pārvesti',
  't.sk. uz augstāka līmeņa',
  't.sk. uz zemāka līmeņa',
  't.sk. uz tāda paša līmeņa'],
 ['2022-02-01T00:00:00',
  '320200001-01',
  'Aizkraukles slimnīca',
  '1',
  '1',
  '0',
  '1',
  '0',
  '0',
  '0',
  '0',
  '0',
  '0',
  '0',
  '0',
  '0'],
 ['2022-02-01T00:00:00',
  '360200027-01',
  'Alūksnes slimnīca',
  '0',
  '0',
  '0',
  '6',
  '0',
  '0',
  '0',
  '0',
  '1',
  '0',
  '0',
  '0',
  '0'],
 ['2022-02-01T00:00:00',
  '500200052-02',
  'Balvu un Gulbenes slimnīcu apvienība',
  '18',
  '',
  '12',
  '6',
  '2',
  '0',
  '16',
  '1',
  '1',
  '0',
  '0',
  '0',
  '0'],
 ['2022-02-01T00:00:00',
  '400200024-01',
  'Bauskas slimnīca',
  '2',
  '0',
  '4',
  '0',
  '0',
  '0',
  '4',
  '0',
  '1',
  '0',
  '0',
  '0',
  '0'],
 ['2022-02-01T00:00:00',
  '01

In [18]:
data[0][2]

'ĀI nosaukums'

In [19]:
data[1][2]

'Aizkraukles slimnīca'

In [20]:
# how could we access columns by name?
#  - DictReader to the rescue

with open(fname_csv, encoding="utf-8") as file_csv:
    
    rdr = csv.DictReader(file_csv, delimiter=";")
    rdr_10_rows = islice(rdr, 10)
    
    data = [line for line in rdr_10_rows]

data

[{'Datums': '2022-02-01T00:00:00',
  'ĀI kods': '320200001-01',
  'ĀI nosaukums': 'Aizkraukles slimnīca',
  'Kopā': '1',
  'Jauni': '1',
  'Pamata diagnoze': '0',
  'Blakus diagnoze': '1',
  'Smaga slimības gaita': '0',
  't.sk. Invazīva MPV': '0',
  'Vidēja slimības gaita': '0',
  'Miruši': '0',
  'Izrakstīti': '0',
  'Pārvesti': '0',
  't.sk. uz augstāka līmeņa': '0',
  't.sk. uz zemāka līmeņa': '0',
  't.sk. uz tāda paša līmeņa': '0'},
 {'Datums': '2022-02-01T00:00:00',
  'ĀI kods': '360200027-01',
  'ĀI nosaukums': 'Alūksnes slimnīca',
  'Kopā': '0',
  'Jauni': '0',
  'Pamata diagnoze': '0',
  'Blakus diagnoze': '6',
  'Smaga slimības gaita': '0',
  't.sk. Invazīva MPV': '0',
  'Vidēja slimības gaita': '0',
  'Miruši': '0',
  'Izrakstīti': '1',
  'Pārvesti': '0',
  't.sk. uz augstāka līmeņa': '0',
  't.sk. uz zemāka līmeņa': '0',
  't.sk. uz tāda paša līmeņa': '0'},
 {'Datums': '2022-02-01T00:00:00',
  'ĀI kods': '500200052-02',
  'ĀI nosaukums': 'Balvu un Gulbenes slimnīcu apvienī

In [21]:
data[0]["ĀI nosaukums"]

'Aizkraukles slimnīca'

In [22]:
rdr.fieldnames

['Datums',
 'ĀI kods',
 'ĀI nosaukums',
 'Kopā',
 'Jauni',
 'Pamata diagnoze',
 'Blakus diagnoze',
 'Smaga slimības gaita',
 't.sk. Invazīva MPV',
 'Vidēja slimības gaita',
 'Miruši',
 'Izrakstīti',
 'Pārvesti',
 't.sk. uz augstāka līmeņa',
 't.sk. uz zemāka līmeņa',
 't.sk. uz tāda paša līmeņa']

#### DictWriter

https://docs.python.org/3/library/csv.html#csv.DictWriter

In [23]:
with open('names.csv', 'w', newline='') as csvfile:
    fieldnames = ['first_name', 'last_name']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    writer.writerow({'first_name': 'John', 'last_name': 'Smith'})
    writer.writerow({'first_name': 'Adam', 'last_name': 'Strange'})
    writer.writerow({'first_name': 'Alice', 'last_name': 'Wonder'})

In [24]:
with open('names.csv') as csvfile:
    for line in csvfile:
        print(line.strip())

first_name,last_name
John,Smith
Adam,Strange
Alice,Wonder


### JSON input / output

JSON (JavaScript Object Notation) files let us save Python data hierarchies (dictionaries, lists, ...) to a file / read them from a file.

https://www.json.org/json-en.html

To do this, we will use Python json library:

- json.dump() – save structured data to a JSON file
- json.dumps() – return structured data as a JSON string
- json.load() – read structured data from a JSON file
- json.loads() – read structured data from a JSON string

https://docs.python.org/3/library/json.html

In [25]:
import json

[name for name in dir(json) if not name.startswith("__")]

['JSONDecodeError',
 'JSONDecoder',
 'JSONEncoder',
 '_default_decoder',
 '_default_encoder',
 'codecs',
 'decoder',
 'detect_encoding',
 'dump',
 'dumps',
 'encoder',
 'load',
 'loads',
 'scanner']

In [26]:
example = {"key1": "value1",
           "key2": ["list", "of", "values"],
           "key3": {"another": 1, "dict": 42}}

In [27]:
# saving data to a JSON file

example_fname = "example.json"

with open(example_fname, "w") as file:
    json.dump(example, file)

In [28]:
# loading data from a JSON file

with open(example_fname) as file:
    data = json.load(file)

In [29]:
data

{'key1': 'value1',
 'key2': ['list', 'of', 'values'],
 'key3': {'another': 1, 'dict': 42}}

#### Loading JSON from a URL (using requests)

Data source: https://jsonplaceholder.typicode.com/

In [30]:
json_url = "https://jsonplaceholder.typicode.com/posts"

In [31]:
import requests

In [32]:
req = requests.get(json_url)

json_data = req.json()

json_data[:3]

[{'userId': 1,
  'id': 1,
  'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit',
  'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto'},
 {'userId': 1,
  'id': 2,
  'title': 'qui est esse',
  'body': 'est rerum tempore vitae\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\nqui aperiam non debitis possimus qui neque nisi nulla'},
 {'userId': 1,
  'id': 3,
  'title': 'ea molestias quasi exercitationem repellat qui ipsa sit aut',
  'body': 'et iusto sed quo iure\nvoluptatem occaecati omnis eligendi aut ad\nvoluptatem doloribus vel accusantium quis pariatur\nmolestiae porro eius odio et labore et velit aut'}]

In [33]:
# alternative approach (without requests)

from urllib.request import urlopen

with urlopen(json_url) as req:
    body = req.read()

body[:100]

b'[\n  {\n    "userId": 1,\n    "id": 1,\n    "title": "sunt aut facere repellat provident occaecati excep'

In [34]:
# decode JSON data from string input

json_data = json.loads(body)

json_data[:2]

[{'userId': 1,
  'id': 1,
  'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit',
  'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto'},
 {'userId': 1,
  'id': 2,
  'title': 'qui est esse',
  'body': 'est rerum tempore vitae\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\nqui aperiam non debitis possimus qui neque nisi nulla'}]

In [35]:
# you can also use dumps() to convert data to a JSON string

example

{'key1': 'value1',
 'key2': ['list', 'of', 'values'],
 'key3': {'another': 1, 'dict': 42}}

In [36]:
json.dumps(example)

'{"key1": "value1", "key2": ["list", "of", "values"], "key3": {"another": 1, "dict": 42}}'

### Saving Python objects - pickle module

https://docs.python.org/3/library/pickle.html

In [37]:
# example class

class Dog:
    """
    I am a dog.
    """
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
    def say(self, text="Wow!"):
        print(self.name, "says '" + text + "'")
        
    def __str__(self):
        # define how to convert this object to a string
        return f"Dog: name={self.name}, age={self.age}"
    
rex = Dog("Rex", 2)

In [38]:
import pickle

fname = "data.pickle"

# we are opening file in a binary mode ("wb")
with open(fname, "wb") as outf:
    pickle.dump(rex, outf)


In [39]:
with open(fname, "rb") as inf:
    data = pickle.load(inf)
    
data

<__main__.Dog at 0x10a48f580>

In [40]:
str(data)

'Dog: name=Rex, age=2'

### Other file-related operations

- https://docs.python.org/3/library/filesys.html
- https://realpython.com/working-with-files-in-python/

#### Listing files in a directory

In [41]:
import os

# current directory
cwd = os.getcwd()

cwd

'/Users/captsolo/_changed_stuff_/Code/LU_GeoPython_2023/notebooks'

In [42]:
main_dir = "."

with os.scandir(main_dir) as lines:
    for line in lines:
        print(line.name)

nltk_example.ipynb
06 - Strings, File Input and Output.ipynb
pandas_cheatsheet.pdf
09 - Pandas.ipynb
FOLIUM.ipynb
somefile.txt
01 - Python Introduction.ipynb
data.pickle
04 - Python Dictionaries and Sets.ipynb
US_Unemployment_Oct2012.csv
test123.txt
10 - Matplotlib.ipynb
03 - Python Functions.ipynb
example.json
07 - Object Oriented Programming.ipynb
Colormaps_2.html
Pandas Exercises.ipynb
riga.json
README.md
img
Colormaps_1.html
names.csv
GeoPy.ipynb
Colormaps_0.html
.ipynb_checkpoints
test_pandas.csv
11 - NumPy.ipynb
us-states.json
08 - File Operations 2.ipynb
02 - Python Introduction.ipynb
program_example.py
data
05 - Python Libraries.ipynb
test_data.csv
02 - Examples.ipynb
covidpatients.csv


In [43]:
from pathlib import Path

my_path = Path(".")

entries_in_path = my_path.iterdir()

for line in sorted(entries_in_path):
    print(line.name)

.ipynb_checkpoints
01 - Python Introduction.ipynb
02 - Examples.ipynb
02 - Python Introduction.ipynb
03 - Python Functions.ipynb
04 - Python Dictionaries and Sets.ipynb
05 - Python Libraries.ipynb
06 - Strings, File Input and Output.ipynb
07 - Object Oriented Programming.ipynb
08 - File Operations 2.ipynb
09 - Pandas.ipynb
10 - Matplotlib.ipynb
11 - NumPy.ipynb
Colormaps_0.html
Colormaps_1.html
Colormaps_2.html
FOLIUM.ipynb
GeoPy.ipynb
Pandas Exercises.ipynb
README.md
US_Unemployment_Oct2012.csv
covidpatients.csv
data
data.pickle
example.json
img
names.csv
nltk_example.ipynb
pandas_cheatsheet.pdf
program_example.py
riga.json
somefile.txt
test123.txt
test_data.csv
test_pandas.csv
us-states.json


In [44]:
# listing only directories

entries_in_path = my_path.iterdir()

for line in sorted(entries_in_path):
    if line.is_dir():
        print(line.name)

.ipynb_checkpoints
data
img


In [45]:
# listing only files

entries_in_path = my_path.iterdir()

for line in sorted(entries_in_path):
    if line.is_file():
        print(line.name)

01 - Python Introduction.ipynb
02 - Examples.ipynb
02 - Python Introduction.ipynb
03 - Python Functions.ipynb
04 - Python Dictionaries and Sets.ipynb
05 - Python Libraries.ipynb
06 - Strings, File Input and Output.ipynb
07 - Object Oriented Programming.ipynb
08 - File Operations 2.ipynb
09 - Pandas.ipynb
10 - Matplotlib.ipynb
11 - NumPy.ipynb
Colormaps_0.html
Colormaps_1.html
Colormaps_2.html
FOLIUM.ipynb
GeoPy.ipynb
Pandas Exercises.ipynb
README.md
US_Unemployment_Oct2012.csv
covidpatients.csv
data.pickle
example.json
names.csv
nltk_example.ipynb
pandas_cheatsheet.pdf
program_example.py
riga.json
somefile.txt
test123.txt
test_data.csv
test_pandas.csv
us-states.json


#### Searching for files

In [46]:
import glob

# find all files ending with ".csv" (in the current directory)
glob.glob('*.csv')

['US_Unemployment_Oct2012.csv',
 'names.csv',
 'test_pandas.csv',
 'test_data.csv',
 'covidpatients.csv']

In [47]:
# look for files in sub-directories too
glob.glob("**/*.csv", recursive=True)

['US_Unemployment_Oct2012.csv',
 'names.csv',
 'test_pandas.csv',
 'test_data.csv',
 'covidpatients.csv',
 'data/iedz_skaits_2018.csv',
 'data/covidpatients.csv']

#### More information

See the [Real Python article](https://realpython.com/working-with-files-in-python/) for info on how to:
- Create directories and directory trees
- Find patterns in filenames
- Create temporary files and directories
- Move, rename, copy, and delete files or directories
- Read and extract data from different types of archives



### Exercises