In [10]:
import pandas as pd
import json
import csv

# 1. JSON files

link: https://docs.python.org/3/library/json.html

## 1.1 read and save to JSON file (by pandas)
`pd.read_json()` and `pd.to_json('path',orient)`

refer: https://pythonexamples.org/python-json-to-dict/

In [33]:
df = pd.read_csv('/home/sharma/Desktop/DeepLearning/SQL/datasets/Berlin_crimes.csv')
df.to_json('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/Berlin_crimes.json', orient='index')

## 1.2 read and write JSON file (by json python lib)

**`json.dump(data, file)`: `dump` takes json Obj (js data) -> str (file) Output to file**

**`json.load(file)`: `load` takes str / bytes / bytearry (file) -> json Obj (js data) parse file**

In [34]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/Berlin_crimes.json') as js_file:
    js_data = json.load(js_file)

In [41]:
js_data['0']['Year']

2012

In [35]:
for k, v in js_data.items():
    print(k,':',v)

0 : {'Year': 2012, 'District': 'Mitte', 'Code': 10111, 'Location': 'Tiergarten Süd', 'Robbery': 70, 'Street_robbery': 46, 'Injury': 586, 'Agg_assault': 194, 'Threat': 118, 'Theft': 2263, 'Car': 18, 'From_car': 328, 'Bike': 120, 'Burglary': 68, 'Fire': 16, 'Arson': 4, 'Damage': 273, 'Graffiti': 26, 'Drugs': 171, 'Local': 1032}
1 : {'Year': 2012, 'District': 'Mitte', 'Code': 10112, 'Location': 'Regierungsviertel', 'Robbery': 65, 'Street_robbery': 29, 'Injury': 474, 'Agg_assault': 123, 'Threat': 142, 'Theft': 3203, 'Car': 10, 'From_car': 307, 'Bike': 170, 'Burglary': 37, 'Fire': 10, 'Arson': 4, 'Damage': 380, 'Graffiti': 124, 'Drugs': 98, 'Local': 870}
2 : {'Year': 2012, 'District': 'Mitte', 'Code': 10113, 'Location': 'Alexanderplatz', 'Robbery': 242, 'Street_robbery': 136, 'Injury': 1541, 'Agg_assault': 454, 'Threat': 304, 'Theft': 8988, 'Car': 81, 'From_car': 792, 'Bike': 822, 'Burglary': 275, 'Fire': 49, 'Arson': 27, 'Damage': 1538, 'Graffiti': 522, 'Drugs': 435, 'Local': 3108}
3 : {'Y

In [49]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/Berlin_crimes_dump.json', 'w') as js_f:
    json.dump(js_data, js_f, indent='\t')

**`json.dumps(js_dict)`: dumps takes json Obj (js data) -> str**

**`json.loads(str)`: loads takes str -> json Obj (js dict)**

In [50]:
print(type(json.dumps(js_data)))

<class 'str'>


In [37]:
print(json.dumps(js_data, indent='\t'))

{
	"0": {
		"Year": 2012,
		"District": "Mitte",
		"Code": 10111,
		"Location": "Tiergarten S\u00fcd",
		"Robbery": 70,
		"Street_robbery": 46,
		"Injury": 586,
		"Agg_assault": 194,
		"Threat": 118,
		"Theft": 2263,
		"Car": 18,
		"From_car": 328,
		"Bike": 120,
		"Burglary": 68,
		"Fire": 16,
		"Arson": 4,
		"Damage": 273,
		"Graffiti": 26,
		"Drugs": 171,
		"Local": 1032
	},
	"1": {
		"Year": 2012,
		"District": "Mitte",
		"Code": 10112,
		"Location": "Regierungsviertel",
		"Robbery": 65,
		"Street_robbery": 29,
		"Injury": 474,
		"Agg_assault": 123,
		"Threat": 142,
		"Theft": 3203,
		"Car": 10,
		"From_car": 307,
		"Bike": 170,
		"Burglary": 37,
		"Fire": 10,
		"Arson": 4,
		"Damage": 380,
		"Graffiti": 124,
		"Drugs": 98,
		"Local": 870
	},
	"2": {
		"Year": 2012,
		"District": "Mitte",
		"Code": 10113,
		"Location": "Alexanderplatz",
		"Robbery": 242,
		"Street_robbery": 136,
		"Injury": 1541,
		"Agg_assault": 454,
		"Threat": 304,
		"Theft": 8988,
		"Car": 81,
		"From_car": 792

# summary

### diff `load`, `loads`, `dump` and `dumps`

 `json` | from | to
 ---|---|---
`load(file)` | file | `json_dict`
`loads(str/bytes/bytearry)`| `str / bytes / bytearry` | `json_dict`
`dump(json_dict, f)` | `json_dict` / `list` | file
`dumps(json_dict)`| `json_dict` / `list` | `str`


# two method to read and write json

1. use `load` or `dump` combined with **context manager** `with (url)open`: **file <-> json_dict**

    + read:

    ```python
    with (url)open('pth/name', 'r') as file:
        json_dict = json.load(file)
    ```

    + write:
    ```python
    with (url)open('pth/name', 'w') as file:
        json.dump(json_dict, file)
    ```
    
    *notice here can also use `read` or `write` as
    ```python
    with (url)open('pth/name') as f:
        f_bytes = f.read()
    json_dict = json.load(f_bytes)
    ```

2. use`open` and `read` or `write` then `loads` or `dumps`: **file <-> str/bytes/bytearry <-> json_dict**

    + read:
    ```python
    file = (url)open('pth/name', 'r')
    file_bytes = file.read(file)
    json_dict = json.loads(file_bytes)
    file.close()
    ```
    
    + write:
    ```python
    file = open('pth/nam', 'w')
    dumps_str = json.dumps(js_dict)
    file.write(dumps_str)
    file.close()
    ```
    
    *notice: if use `urlopen('url', 'r/w')`*:
    ```python
    response = urlopen('url', 'r/w')
    res_bytes = response.read()
    json_dict = json.loads(res_bytes)
    ```

In [48]:
js_data_dumps = json.dumps(js_data, indent='\t')
js_data_loads = json.loads(js_data_dumps)
type(js_data_loads), type(js_data_dumps)

(dict, str)

# 2 open from URL
## 2.1 use `urllib.request.urlopen` combine with `json` lib

In [4]:
from urllib.request import urlopen

### difference between `load` and `loads`

+ `with open() ... load(file)`: `open()` produce `str`.
    
    ```python
    with open('pth/name', 'r') as f: # type(f): str
        json.load(f) # load(str)
    ```

+ `urlopen()... loads(response.read())`:
    - `urlopen()` produce `HTTPResponse`, then `read` produce `bytes`, only `loads(bytes)` canbe used
    
    ```python
    with urlopen('url') as response: # type(response): HTTPResponse
        bytes = response.read() # type(bytes): bytes
        json.loads(bytes) # loads(str/bytes)
    ```
    or
    
    ```python
    url = "url"
    response = urlopen(url)
    data = json.loads(response.read())
    ```    


## 2.2 use `requests` lib

+ in two steps: 
    1. `get(url)` -> `HTTPResponse`
    2. `HTTPResponse.json()` -> `dict`
    
```python
import requests
r = requests.get('url')
js_dict = r.json()
```

In [62]:
with urlopen("https://api.exchangerate.host/latest") as url_file:
    url_data = json.load(url_file)
print(type(url_data))

<class 'dict'>


In [63]:
url = "https://api.exchangerate.host/latest"
response = urlopen(url)
url_data_loads = json.loads(response.read())
print(type(response.read()), type(url_data_loads))

<class 'bytes'> <class 'dict'>


In [64]:
with urlopen("https://api.exchangerate.host/latest") as url_source:
    source = url_source.read()
url_data_loads_1 = json.loads(source)
print(type(source))

<class 'bytes'>


In [75]:
url_data_loads_1['rates']#['AED']

{'AED': 4.363894,
 'AFN': 103.197685,
 'ALL': 121.895407,
 'AMD': 586.084936,
 'ANG': 2.132204,
 'AOA': 751.891161,
 'ARS': 116.296635,
 'AUD': 1.593787,
 'AWG': 2.139842,
 'AZN': 2.020403,
 'BAM': 1.95742,
 'BBD': 2.376866,
 'BDT': 101.11616,
 'BGN': 1.956058,
 'BHD': 0.448476,
 'BIF': 2357.374978,
 'BMD': 1.188444,
 'BND': 1.59556,
 'BOB': 8.199567,
 'BRL': 6.166873,
 'BSD': 1.187592,
 'BTC': 2.4e-05,
 'BTN': 86.713723,
 'BWP': 13.064527,
 'BYN': 2.970061,
 'BZD': 2.394724,
 'CAD': 1.488528,
 'CDF': 2358.455904,
 'CHF': 1.086454,
 'CLF': 0.033305,
 'CLP': 911.118007,
 'CNH': 7.649812,
 'CNY': 7.666086,
 'COP': 4502.603569,
 'CRC': 741.315587,
 'CUC': 1.18811,
 'CUP': 30.586947,
 'CVE': 110.530906,
 'CZK': 25.372273,
 'DJF': 211.40602,
 'DKK': 7.436054,
 'DOP': 67.498304,
 'DZD': 161.147235,
 'EGP': 18.654497,
 'ERN': 17.823269,
 'ETB': 54.298125,
 'EUR': 1,
 'FJD': 2.468738,
 'FKP': 0.857189,
 'GBP': 0.857571,
 'GEL': 3.686635,
 'GGP': 0.856758,
 'GHS': 7.1963,
 'GIP': 0.856829,
 'GM

# 3. csv files

link: https://docs.python.org/3/library/csv.html

youtube: https://www.youtube.com/watch?v=q5uM4VKywbA&list=PLCGI9XxqDKIpeIQAD9aDzstnZWTGEgtMX&index=4

## 3.1 read and write by `csv.reader` and `csv.writer`

In [60]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/Pandas/pandas_IO/Berlin_year_TK_Graffiti.csv') as f:
    csv_reader = csv.reader(f)
    # next() to remove the field name row
    next(csv_reader)
    berlin_y_TK_Graf_list = list()

    for line in csv_reader:
        berlin_y_TK_Graf_list.append(line)

print(type(csv_reader), type(berlin_y_TK_Graf_list))

<class '_csv.reader'> <class 'list'>


In [32]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/berlin_y_TK_graf.csv', 'w') as f:
    csv_writer = csv.writer(f, delimiter='\t')
    csv_writer.writerows(berlin_y_TK_Graf_list)

#     for line in berlin_y_TK_Graf:
#         csv_writer.writerow(line)

In [29]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/Pandas/pandas_IO/Berlin_year_TK_Graffiti.csv') as f1:
    csv_reader = csv.reader(f1)
#     next(csv_reader)
    with open('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/berlin_y_TK_graf.csv', 'w') as f2:
        csv_writer = csv.writer(f2, delimiter='\t')
        for line in csv_reader:
            csv_writer.writerow(line)

## 3.2 read and write by `csv.DictReader` and `csv.DictWriter`


`csv`| input | output | `writerow(s)` input
---|---|---|---
`reader`|file| `List generator`|
`DictReader` | file / `dict` | `OrderedDict generator`|
`writer`|file| writer obj| `List` `writerows(List generator)`
`DictWriter` | file | DictWriter.obj| `Dict` `writerows(ODict generator)`

In [87]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/Pandas/pandas_IO/Berlin_year_TK_Graffiti.csv') as f:
    csv_dic_r = csv.DictReader(f)
    for line in csv_dic_r:
        print(line)
print(csv_dic_r)

{'': '93', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '69'}
{'': '94', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '37'}
{'': '95', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '82'}
{'': '96', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '59'}
{'': '97', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '58'}
{'': '98', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '65'}
{'': '99', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '53'}
{'': '100', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '56'}
{'': '101', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '36'}
{'': '102', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '24'}
{'': '103', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '32'}
{'': '104', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': '9'}
{'': '105', 'Year': '2012', 'District': 'Treptow-Köpenick', 'Graffiti': 

In [86]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/Pandas/pandas_IO/Berlin_year_TK_Graffiti.csv') as f:
    csv_dic_r = csv.DictReader(f)
    with open('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/berlin_y_TK_graf.csv', 'w') as f:
        fn = ['', 'Year', 'Graffiti']
        csv_dic_w = csv.DictWriter(f, fieldnames=fn, delimiter='\t')
        csv_dic_w.writeheader()
        for line in csv_dic_r:
            # without 'District' column
            del line['District']
            csv_dic_w.writerow(line)

In [116]:
dict_reader = csv.DictReader(url_data_loads)

for line in dict_reader:
    print(line)

{'motd': 'success'}
{'motd': 'base'}
{'motd': 'date'}
{'motd': 'rates'}


In [51]:
type(dict_reader), type(csv_dic_r)

(csv.DictReader, csv.DictReader)

In [110]:
url_csv_r = csv.DictReader(url_data['rates'])
url_csv_r

<csv.DictReader at 0x7fa2a84a8370>

In [117]:
with open('/home/sharma/Desktop/DeepLearning/DataScience/IO/saved_datasets/currency_exchange.csv', 'w') as f:
    fn = ['motd', 'success', 'base', 'date', 'rates']#['currency', 'rate']
    csv_dict_writer = csv.DictWriter(f, fieldnames=fn)
    csv_dict_writer.writeheader()
    for l in dict_reader:
        print(l)
        csv_dict_writer.writerow(l)
#     csv_dict_writer.writerows(url_csv_r)

In [113]:
print(url_data_loads.keys())
for l in url_data_loads['rates'].items():
    print(l)

dict_keys(['motd', 'success', 'base', 'date', 'rates'])
('AED', 4.363894)
('AFN', 103.197685)
('ALL', 121.895407)
('AMD', 586.084936)
('ANG', 2.132204)
('AOA', 751.891161)
('ARS', 116.296635)
('AUD', 1.593787)
('AWG', 2.139842)
('AZN', 2.020403)
('BAM', 1.95742)
('BBD', 2.376866)
('BDT', 101.11616)
('BGN', 1.956058)
('BHD', 0.448476)
('BIF', 2357.374978)
('BMD', 1.188444)
('BND', 1.59556)
('BOB', 8.199567)
('BRL', 6.166873)
('BSD', 1.187592)
('BTC', 2.4e-05)
('BTN', 86.713723)
('BWP', 13.064527)
('BYN', 2.970061)
('BZD', 2.394724)
('CAD', 1.488528)
('CDF', 2358.455904)
('CHF', 1.086454)
('CLF', 0.033305)
('CLP', 911.118007)
('CNH', 7.649812)
('CNY', 7.666086)
('COP', 4502.603569)
('CRC', 741.315587)
('CUC', 1.18811)
('CUP', 30.586947)
('CVE', 110.530906)
('CZK', 25.372273)
('DJF', 211.40602)
('DKK', 7.436054)
('DOP', 67.498304)
('DZD', 161.147235)
('EGP', 18.654497)
('ERN', 17.823269)
('ETB', 54.298125)
('EUR', 1)
('FJD', 2.468738)
('FKP', 0.857189)
('GBP', 0.857571)
('GEL', 3.686635)


## 3.3 summary
## read and write csv (by `generator`)

+ read
    - `csv.reader(f)`: -> `generator` can be `list`

    ```python
    with open('pth/name') as f_r:
        # list generator
        csv_reader = csv.reader(f_r)
        next(csv_reader)
        store = list()
        for line in csv_reader:
            store.append(line)
    ```
    - `csv.DictReader(f)`: -> `OrderedDict`
    
    ```python
    with open('pth/name') as f_r:
        # dict generator
        csv_dict_r = csv.DictReader(f_r)
        store = list()
        for line in csv_reader:
            store.append(line)
    ```
    
+ write
    - `csv.writer`
    ```python
    with open('pth/name', 'w') as f_w:
        csv_writer = csv.writer(f_w, delimiter='\t')
        for line in csv_reader:
            csv_writer.writerow(line)
    ```
    or
    ```python
    with open('pth/name', 'w') as f_w:
        csv_writer = csv.writer(f_w, delimiter='\t')
        csv_writer.writerows(csv_reader)
    ```
    - `csv.DictWriter`
    ```python
    with open('pth/name', 'w') as f_w:
        fn = ['remain_colnames']
        csv_writer = csv.DictWriter(f_w, fieldname=fn, delimiter='\t')
        csv_writer.writeheader()
        for line in csv_reader:
            del line['need_delet_cols']
            csv_writer.writerow(line)
    ```   

# 4. I/O and file system
https://www.youtube.com/watch?v=Uh2ebFW8OYM&list=PLCGI9XxqDKIpeIQAD9aDzstnZWTGEgtMX&index=2

```python
with open('filename', 'r') as f:
    read_size = #
    f_contents = f.read(read_size)
    while len(f_contents)>0:
        print(f_contents, end='')
        f_contents = f.read(read_size)
```