In [3]:
import os
from pprint import pprint
from pathlib import Path
from datetime import datetime, timedelta, timezone
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from PIL import Image

# Load Data

In [4]:
root_data_dirpath = Path('/workspace/deformation-prediction-multi-environment/data')
raw_data_dirpath = root_data_dirpath / 'raw'

In [4]:
session_data_filepath = raw_data_dirpath / 'session.csv'

In [5]:
session_df = pd.read_csv(session_data_filepath)

In [6]:
session_df

Unnamed: 0,session_id,location,start_time_JST,end_time_JST
0,session_20240926_151603,512,2024-09-26 15:17:00,2024-09-26 15:22:00
1,session_20240926_152330,512,2024-09-26 15:24:00,2024-09-26 15:29:00
2,session_20240926_153446,512,2024-09-26 15:35:00,2024-09-26 15:40:00
3,session_20240926_154624,512,2024-09-26 15:47:00,2024-09-26 15:52:00
4,session_20240926_165118,511,2024-09-26 16:52:00,2024-09-26 16:57:00
5,session_20240926_170021,511,2024-09-26 17:01:00,2024-09-26 17:06:00
6,session_20240926_171232,511,2024-09-26 17:13:00,2024-09-26 17:18:00
7,session_20240926_172237,511,2024-09-26 17:23:00,2024-09-26 17:28:00
8,session_20240926_175737,514B,2024-09-26 17:58:00,2024-09-26 18:03:00
9,session_20240926_180441,514B,2024-09-26 18:05:00,2024-09-26 18:10:00


In [7]:
def convert_jst_to_utc(jst_time_str):
    jst_offset = timedelta(hours=9)
    jst_time = datetime.strptime(jst_time_str, '%Y-%m-%d %H:%M:%S')
    utc_time = jst_time - jst_offset
    return utc_time

def load_meta_data(file_path):
    df = pd.read_csv(file_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    return df

def get_session_data(session_df, session_id):
    selected_row = session_df.loc[session_df['session_id'] == session_id]
    location = selected_row['location'].values[0]
    start_time_jst = selected_row['start_time_JST'].values[0]
    end_time_jst = selected_row['end_time_JST'].values[0]
    return location, start_time_jst, end_time_jst

def load_session_image_meta_data(session_id, session_df, raw_data_dirpath):
    # セッションデータ取得
    location, start_time_jst_str, end_time_jst_str = get_session_data(session_df, session_id)
    
    # JST -> UTC変換
    start_time_utc = convert_jst_to_utc(start_time_jst_str)
    end_time_utc = convert_jst_to_utc(end_time_jst_str)

    print(f"Location: {location}, Start Time: {start_time_utc}, End Time: {end_time_utc}")

    # ファイルパス設定
    session_dirpath = raw_data_dirpath / location / session_id / 'camera0'
    meta_data_filepath = session_dirpath / 'meta_data.csv'

    # メタデータ読み込み
    df = load_meta_data(meta_data_filepath)

    return df

In [9]:
def resample(meta_data_df, start_time, end_time):
    # pd.date_rangeではendが含まれるので、1フレーム分の時間を引く
    new_timestamps = pd.date_range(start=start_time, end=end_time-pd.Timedelta(milliseconds=int(1000 / fps)), freq=f'{int(1000 / fps)}ms')
    new_df = pd.DataFrame(new_timestamps, columns=['timestamp'])
    resampled_df = pd.merge_asof(new_df, meta_data_df, on='timestamp', direction='backward')

    return resampled_df

In [11]:
fps = 10

In [12]:
# 各locationごとに保存するための辞書を用意します
location_dfs = {}

# session_dfを順番に処理
for index, row in session_df.iterrows():
    session_id = row['session_id']
    location = row['location']
    start_time_jst_str = row['start_time_JST']
    end_time_jst_str = row['end_time_JST']

    # JST -> UTC変換
    start_time_utc = convert_jst_to_utc(start_time_jst_str)
    end_time_utc = convert_jst_to_utc(end_time_jst_str)
    print(f"Location: {location}, Start Time: {start_time_utc}, End Time: {end_time_utc}")

    session_dirpath = raw_data_dirpath / location / session_id / 'camera0'
    meta_data_filepath = session_dirpath / 'meta_data.csv'
    meta_data_df = load_meta_data(meta_data_filepath)

    # リサンプリング
    resampled_df = resample(meta_data_df, start_time_utc, end_time_utc)

    resampled_df['location'] = location
    resampled_df['session_id'] = session_id
    resampled_df['full_filepath'] = Path(location) / session_id / 'camera0' / resampled_df['filepath']

    # locationごとにdfを保存
    if location not in location_dfs:
        location_dfs[location] = resampled_df
    else:
        location_dfs[location] = pd.concat([location_dfs[location], resampled_df], ignore_index=True)

# locationごとにdfを取得
df_512 = location_dfs.get('512', pd.DataFrame())
df_511 = location_dfs.get('511', pd.DataFrame())
df_514B = location_dfs.get('514B', pd.DataFrame())

Location: 512, Start Time: 2024-09-26 06:17:00, End Time: 2024-09-26 06:22:00
Location: 512, Start Time: 2024-09-26 06:24:00, End Time: 2024-09-26 06:29:00
Location: 512, Start Time: 2024-09-26 06:35:00, End Time: 2024-09-26 06:40:00
Location: 512, Start Time: 2024-09-26 06:47:00, End Time: 2024-09-26 06:52:00
Location: 511, Start Time: 2024-09-26 07:52:00, End Time: 2024-09-26 07:57:00
Location: 511, Start Time: 2024-09-26 08:01:00, End Time: 2024-09-26 08:06:00
Location: 511, Start Time: 2024-09-26 08:13:00, End Time: 2024-09-26 08:18:00
Location: 511, Start Time: 2024-09-26 08:23:00, End Time: 2024-09-26 08:28:00
Location: 514B, Start Time: 2024-09-26 08:58:00, End Time: 2024-09-26 09:03:00
Location: 514B, Start Time: 2024-09-26 09:05:00, End Time: 2024-09-26 09:10:00
Location: 514B, Start Time: 2024-09-26 09:12:00, End Time: 2024-09-26 09:17:00
Location: 514B, Start Time: 2024-09-26 09:19:00, End Time: 2024-09-26 09:24:00


In [13]:
df_512

Unnamed: 0,timestamp,filepath,location,session_id,full_filepath
0,2024-09-26 06:17:00.000,2024-09-26/06/16/2024-09-26_06-16-59_29.jpg,512,session_20240926_151603,512/session_20240926_151603/camera0/2024-09-26...
1,2024-09-26 06:17:00.100,2024-09-26/06/17/2024-09-26_06-17-00_2.jpg,512,session_20240926_151603,512/session_20240926_151603/camera0/2024-09-26...
2,2024-09-26 06:17:00.200,2024-09-26/06/17/2024-09-26_06-17-00_5.jpg,512,session_20240926_151603,512/session_20240926_151603/camera0/2024-09-26...
3,2024-09-26 06:17:00.300,2024-09-26/06/17/2024-09-26_06-17-00_8.jpg,512,session_20240926_151603,512/session_20240926_151603/camera0/2024-09-26...
4,2024-09-26 06:17:00.400,2024-09-26/06/17/2024-09-26_06-17-00_9.jpg,512,session_20240926_151603,512/session_20240926_151603/camera0/2024-09-26...
...,...,...,...,...,...
11995,2024-09-26 06:51:59.500,2024-09-26/06/51/2024-09-26_06-51-59_14.jpg,512,session_20240926_154624,512/session_20240926_154624/camera0/2024-09-26...
11996,2024-09-26 06:51:59.600,2024-09-26/06/51/2024-09-26_06-51-59_17.jpg,512,session_20240926_154624,512/session_20240926_154624/camera0/2024-09-26...
11997,2024-09-26 06:51:59.700,2024-09-26/06/51/2024-09-26_06-51-59_20.jpg,512,session_20240926_154624,512/session_20240926_154624/camera0/2024-09-26...
11998,2024-09-26 06:51:59.800,2024-09-26/06/51/2024-09-26_06-51-59_23.jpg,512,session_20240926_154624,512/session_20240926_154624/camera0/2024-09-26...


In [14]:
df_511

Unnamed: 0,timestamp,filepath,location,session_id,full_filepath
0,2024-09-26 07:52:00.000,2024-09-26/07/51/2024-09-26_07-51-59_28.jpg,511,session_20240926_165118,511/session_20240926_165118/camera0/2024-09-26...
1,2024-09-26 07:52:00.100,2024-09-26/07/52/2024-09-26_07-52-00_2.jpg,511,session_20240926_165118,511/session_20240926_165118/camera0/2024-09-26...
2,2024-09-26 07:52:00.200,2024-09-26/07/52/2024-09-26_07-52-00_5.jpg,511,session_20240926_165118,511/session_20240926_165118/camera0/2024-09-26...
3,2024-09-26 07:52:00.300,2024-09-26/07/52/2024-09-26_07-52-00_8.jpg,511,session_20240926_165118,511/session_20240926_165118/camera0/2024-09-26...
4,2024-09-26 07:52:00.400,2024-09-26/07/52/2024-09-26_07-52-00_11.jpg,511,session_20240926_165118,511/session_20240926_165118/camera0/2024-09-26...
...,...,...,...,...,...
11995,2024-09-26 08:27:59.500,2024-09-26/08/27/2024-09-26_08-27-59_14.jpg,511,session_20240926_172237,511/session_20240926_172237/camera0/2024-09-26...
11996,2024-09-26 08:27:59.600,2024-09-26/08/27/2024-09-26_08-27-59_17.jpg,511,session_20240926_172237,511/session_20240926_172237/camera0/2024-09-26...
11997,2024-09-26 08:27:59.700,2024-09-26/08/27/2024-09-26_08-27-59_20.jpg,511,session_20240926_172237,511/session_20240926_172237/camera0/2024-09-26...
11998,2024-09-26 08:27:59.800,2024-09-26/08/27/2024-09-26_08-27-59_23.jpg,511,session_20240926_172237,511/session_20240926_172237/camera0/2024-09-26...


In [15]:
df_514B

Unnamed: 0,timestamp,filepath,location,session_id,full_filepath
0,2024-09-26 08:58:00.000,2024-09-26/08/57/2024-09-26_08-57-59_29.jpg,514B,session_20240926_175737,514B/session_20240926_175737/camera0/2024-09-2...
1,2024-09-26 08:58:00.100,2024-09-26/08/58/2024-09-26_08-58-00_2.jpg,514B,session_20240926_175737,514B/session_20240926_175737/camera0/2024-09-2...
2,2024-09-26 08:58:00.200,2024-09-26/08/58/2024-09-26_08-58-00_5.jpg,514B,session_20240926_175737,514B/session_20240926_175737/camera0/2024-09-2...
3,2024-09-26 08:58:00.300,2024-09-26/08/58/2024-09-26_08-58-00_8.jpg,514B,session_20240926_175737,514B/session_20240926_175737/camera0/2024-09-2...
4,2024-09-26 08:58:00.400,2024-09-26/08/58/2024-09-26_08-58-00_11.jpg,514B,session_20240926_175737,514B/session_20240926_175737/camera0/2024-09-2...
...,...,...,...,...,...
11995,2024-09-26 09:23:59.500,2024-09-26/09/23/2024-09-26_09-23-59_14.jpg,514B,session_20240926_181815,514B/session_20240926_181815/camera0/2024-09-2...
11996,2024-09-26 09:23:59.600,2024-09-26/09/23/2024-09-26_09-23-59_17.jpg,514B,session_20240926_181815,514B/session_20240926_181815/camera0/2024-09-2...
11997,2024-09-26 09:23:59.700,2024-09-26/09/23/2024-09-26_09-23-59_20.jpg,514B,session_20240926_181815,514B/session_20240926_181815/camera0/2024-09-2...
11998,2024-09-26 09:23:59.800,2024-09-26/09/23/2024-09-26_09-23-59_23.jpg,514B,session_20240926_181815,514B/session_20240926_181815/camera0/2024-09-2...


# 保存

In [16]:
def load_image_to_numpy(filepath):
    try:
        # 画像を開いてNumPy配列に変換
        with Image.open(filepath) as img:
            return np.array(img)
    except Exception as e:
        print(f"Error loading image {filepath}: {e}")
        return None

def load_images_to_numpy(df, raw_data_dirpath):
    image_arrays = [load_image_to_numpy(fp) for fp in raw_data_dirpath / df['full_filepath']]

    # 結果を確認
    print(f"Number of images successfully loaded: {sum(img is not None for img in image_arrays)}")

    return image_arrays

In [17]:
resampled_dirpath = root_data_dirpath / 'preprocessed' / 'resampled'
for df, location in zip([df_512, df_511, df_514B], ['512', '511', '514B']):
    images = load_images_to_numpy(df, raw_data_dirpath)
    save_filepath = resampled_dirpath / f'images_{location}_resampled.npy'
    np.save(save_filepath, images)

Number of images successfully loaded: 12000
Number of images successfully loaded: 12000
Number of images successfully loaded: 12000
