In [4]:
cd /content/drive/MyDrive/100knock-process-visualization/chapter-8

/content/drive/MyDrive/100knock-process-visualization/chapter-8


### ノック91：大容量CSVデータを扱ってみよう

In [5]:
import pandas as pd
df = pd.read_csv('data/person_count_out_0001_2021011509.csv')
df

Unnamed: 0,id,place,receive_time,sensor_num,in1,out1,state1,in2,out2,state2
0,0,1,2021-01-15 09:00:00.144,2,508,73,0,73,508,0
1,1,1,2021-01-15 09:00:01.146,2,508,73,0,73,508,0
2,2,1,2021-01-15 09:00:02.161,2,508,73,0,73,508,0
3,3,1,2021-01-15 09:00:03.176,2,508,73,0,73,508,0
4,4,1,2021-01-15 09:00:04.192,2,508,73,0,73,508,0
...,...,...,...,...,...,...,...,...,...,...
3535,3535,1,2021-01-15 09:59:55.054,2,782,156,0,156,782,0
3536,3536,1,2021-01-15 09:59:56.07,2,782,156,0,156,782,0
3537,3537,1,2021-01-15 09:59:57.085,2,782,156,0,156,782,0
3538,3538,1,2021-01-15 09:59:58.101,2,782,156,0,156,782,0


In [6]:
for df in pd.read_csv('data/person_count_out_0001_2021011509.csv', chunksize=512):
    print(df.shape)

(512, 10)
(512, 10)
(512, 10)
(512, 10)
(512, 10)
(512, 10)
(468, 10)


In [7]:
i = 0
for df in pd.read_csv('data/person_count_out_0001_2021011509.csv', chunksize=64):
    df['processed_per_chunk'] = True
    df.to_csv('data/processed_big_data.csv', mode='a', index=False, header=i == 0)
    i += 1

In [8]:
df = pd.read_csv('data/processed_big_data.csv')
df

Unnamed: 0,id,place,receive_time,sensor_num,in1,out1,state1,in2,out2,state2,processed_per_chunk
0,0,1,2021-01-15 09:00:00.144,2,508,73,0,73,508,0,True
1,1,1,2021-01-15 09:00:01.146,2,508,73,0,73,508,0,True
2,2,1,2021-01-15 09:00:02.161,2,508,73,0,73,508,0,True
3,3,1,2021-01-15 09:00:03.176,2,508,73,0,73,508,0,True
4,4,1,2021-01-15 09:00:04.192,2,508,73,0,73,508,0,True
...,...,...,...,...,...,...,...,...,...,...,...
3535,3535,1,2021-01-15 09:59:55.054,2,782,156,0,156,782,0,True
3536,3536,1,2021-01-15 09:59:56.07,2,782,156,0,156,782,0,True
3537,3537,1,2021-01-15 09:59:57.085,2,782,156,0,156,782,0,True
3538,3538,1,2021-01-15 09:59:58.101,2,782,156,0,156,782,0,True


### ノック92：Json形式のファイルを扱ってみよう

In [10]:
pd.read_json('data/column_oriented.json')

Unnamed: 0,id,value
0,1,1
1,2,10
2,3,100


In [11]:
!cat data/column_oriented.json

{"id":{"0":1,"1":2,"2":3},"value":{"0":1,"1":10,"2":100}}

In [12]:
!cat data/index_oriented.json

{"0":{"id":1,"value":1},"1":{"id":2,"value":10},"2":{"id":3,"value":100}}

In [13]:
pd.read_json('data/index_oriented.json')

Unnamed: 0,0,1,2
id,1,2,3
value,1,10,100


In [15]:
pd.read_json('data/column_oriented.json', orient='index')

Unnamed: 0,0,1,2
id,1,2,3
value,1,10,100


In [16]:
!cat data/table_oriented.json

{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"id","type":"integer"},{"name":"value","type":"integer"}],"primaryKey":["index"],"pandas_version":"0.20.0"},"data":[{"index":0,"id":1,"value":1},{"index":1,"id":2,"value":10},{"index":2,"id":3,"value":100}]}

In [18]:
pd.read_json('data/table_oriented.json', orient='table')

Unnamed: 0,id,value
0,1,1
1,2,10
2,3,100


### ノック93：Webからデータを取得してみよう

In [19]:
import requests
response = requests.get('https://worldtimeapi.org/api/timezone/Asia/Tokyo')
response.content

b'{"abbreviation":"JST","client_ip":"35.221.143.248","datetime":"2022-09-21T23:17:29.242723+09:00","day_of_week":3,"day_of_year":264,"dst":false,"dst_from":null,"dst_offset":0,"dst_until":null,"raw_offset":32400,"timezone":"Asia/Tokyo","unixtime":1663769849,"utc_datetime":"2022-09-21T14:17:29.242723+00:00","utc_offset":"+09:00","week_number":38}'

In [20]:
result = response.json()
result

{'abbreviation': 'JST',
 'client_ip': '35.221.143.248',
 'datetime': '2022-09-21T23:17:29.242723+09:00',
 'day_of_week': 3,
 'day_of_year': 264,
 'dst': False,
 'dst_from': None,
 'dst_offset': 0,
 'dst_until': None,
 'raw_offset': 32400,
 'timezone': 'Asia/Tokyo',
 'unixtime': 1663769849,
 'utc_datetime': '2022-09-21T14:17:29.242723+00:00',
 'utc_offset': '+09:00',
 'week_number': 38}

In [21]:
pd.Series(result)

abbreviation                                 JST
client_ip                         35.221.143.248
datetime        2022-09-21T23:17:29.242723+09:00
day_of_week                                    3
day_of_year                                  264
dst                                        False
dst_from                                    None
dst_offset                                     0
dst_until                                   None
raw_offset                                 32400
timezone                              Asia/Tokyo
unixtime                              1663769849
utc_datetime    2022-09-21T14:17:29.242723+00:00
utc_offset                                +09:00
week_number                                   38
dtype: object

In [22]:
import json

with open('data/response.json', mode='w') as f:
    json.dump(result, f)

In [23]:
import time

for _ in range(4):
    response = requests.get('https://worldtimeapi.org/api/timezone/Asia/Tokyo')
    with open('data/responses.txt', mode='a') as f:
        res = response.json()
        f.write(f'{json.dumps(res)}\n')
    time.sleep(1)

In [24]:
!cat data/responses.txt

{"abbreviation": "JST", "client_ip": "35.221.143.248", "datetime": "2022-09-21T23:21:17.547470+09:00", "day_of_week": 3, "day_of_year": 264, "dst": false, "dst_from": null, "dst_offset": 0, "dst_until": null, "raw_offset": 32400, "timezone": "Asia/Tokyo", "unixtime": 1663770077, "utc_datetime": "2022-09-21T14:21:17.547470+00:00", "utc_offset": "+09:00", "week_number": 38}
{"abbreviation": "JST", "client_ip": "35.221.143.248", "datetime": "2022-09-21T23:21:18.804227+09:00", "day_of_week": 3, "day_of_year": 264, "dst": false, "dst_from": null, "dst_offset": 0, "dst_until": null, "raw_offset": 32400, "timezone": "Asia/Tokyo", "unixtime": 1663770078, "utc_datetime": "2022-09-21T14:21:18.804227+00:00", "utc_offset": "+09:00", "week_number": 38}
{"abbreviation": "JST", "client_ip": "35.221.143.248", "datetime": "2022-09-21T23:21:20.060287+09:00", "day_of_week": 3, "day_of_year": 264, "dst": false, "dst_from": null, "dst_offset": 0, "dst_until": null, "raw_offset": 32400, "timezone": "Asia/To

### ノック94：configファイルを扱ってみよう

In [25]:
!cat config.yml

dataset:
  name: pseudo
  path: data/images_by_py/
use_gpu: true


In [27]:
import yaml
with open('config.yml', mode='r') as f:
    config = yaml.load(f, Loader=yaml.Loader)

config

{'dataset': {'name': 'pseudo', 'path': 'data/images_by_py/'}, 'use_gpu': True}

In [28]:
!cat config.toml

use_gpu = true

[dataset]
name = "pseudo"
path = "data/images_by_py/"


In [29]:
import toml
with open('config.toml', mode='r') as f:
    config = toml.load(f)

config

{'use_gpu': True, 'dataset': {'name': 'pseudo', 'path': 'data/images_by_py/'}}

### ノック95：動画ファイルを音声ファイルへ変換してみよう

In [32]:
!pip install  imageio-ffmpeg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting imageio-ffmpeg
  Downloading imageio_ffmpeg-0.4.7-py3-none-manylinux2010_x86_64.whl (26.9 MB)
[K     |████████████████████████████████| 26.9 MB 1.9 MB/s 
[?25hInstalling collected packages: imageio-ffmpeg
Successfully installed imageio-ffmpeg-0.4.7


In [34]:
!pip install imageio==2.4.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting imageio==2.4.1
  Downloading imageio-2.4.1.tar.gz (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 2.1 MB/s 
Building wheels for collected packages: imageio
  Building wheel for imageio (setup.py) ... [?25l[?25hdone
  Created wheel for imageio: filename=imageio-2.4.1-py3-none-any.whl size=3303885 sha256=d52bbabb0286eb5b2bfe1e2a5176ee4758cbdebb23a28bc7ecc6dd5858f6a0f5
  Stored in directory: /root/.cache/pip/wheels/46/20/07/7bb9c8c44e6ec2efa60fd0e6280094f53f65f41767ef69a5ee
Successfully built imageio
Installing collected packages: imageio
  Attempting uninstall: imageio
    Found existing installation: imageio 2.9.0
    Uninstalling imageio-2.9.0:
      Successfully uninstalled imageio-2.9.0
Successfully installed imageio-2.4.1


In [36]:
from moviepy.editor import VideoFileClip

video_clip = VideoFileClip('data/sample_video.mp4')
video_clip.audio.write_audiofile('data/audio_by_py.mp3')

### ノック96：動画ファイルを画像ファイルへ分割してみよう