IMPORT LIBRARIES

In [1]:
import os
import pandas as pd
from functions import *
from datetime import datetime, timedelta, time
import numpy as np
from scipy.interpolate import interp1d

from datetime import datetime, timedelta
import math
import zipfile

DEFINE PARAMETERS

In [3]:
ROOT = "/media/clrintz/plancha_drive_3/plancha_session/data/"
SESSION_NAME = "session_2022_10_19_aldabra_ARM01_plancha_body_v1A_00"

# La Reunion only
# rgp_station = "lepo"

frames_per_second = "3"

leap_sec = 18

In [3]:
# derived parameters
exiftool_config_path = "/home/clrintz/Documents/workflow_plancha/exiftool_roll_pitch_config_file.config"
PPK_CONFIG_PATH = "/home/clrintz/Documents/workflow_plancha/"  # location of config files
ppk_cfgs = ["ppk_config_file"]  # list of config files to run (files should have .conf ext)

SESSION_PATH = ROOT + SESSION_NAME
VIDEOS_PATH = SESSION_PATH + "/DCIM/videos"
IMAGES_PATH = SESSION_PATH + "/DCIM/images"
FRAMES_PATH = VIDEOS_PATH + "/frames"
GPS_PATH = SESSION_PATH + "/GPS"
GPS_BASE_PATH = GPS_PATH + "/BASE"
GPS_DEVICE_PATH = GPS_PATH + "/DEVICE"
BATHY_PATH = SESSION_PATH + "/BATHY"
#METADATA_PATH = SESSION_PATH + "/METADATA/" + SESSION_NAME + "_exif_metadata.csv"
METADATA_PATH = SESSION_PATH + "/METADATA"
CSV_EXIFTOOL_FRAMES = METADATA_PATH + "/metadata.csv"
CSV_EXIFTOOL_VIDEO =  METADATA_PATH + "/csv_exiftool_video.csv"
delta_time = str(1/float(frames_per_second))

TRIM VIDEOS

In [None]:
split_videos(VIDEOS_PATH, FRAMES_PATH, frames_per_second, SESSION_NAME, METADATA_PATH)

CHANGE IMAGES COLORS

from PIL import Image, ImageOps
PIL_PATH = FRAMES_PATH
for file in os.listdir(PIL_PATH):
    im_path = PIL_PATH + "/" + file
    im = Image.open(im_path)
    r, g, b = im.split()
    r, g, b = ImageOps.autocontrast(r, cutoff = 1), ImageOps.autocontrast(g, cutoff = 1), ImageOps.autocontrast(b, cutoff = 1)
    im = Image.merge("RGB",[r, g, b])
    im.save(im_path)

TIME CALIBRATE AND GEOTAG IMAGES

In [4]:
# parameter definition
# N.B. insert the date and time following the format "YYYY:MM:DD HH:MM:SS.000"
# N.B. insert the time in UTC+0 format
time_first_frame = "2022:10:19 11:56:00.400"

# correct time by adding leap seconds to match GPS time (in 2023 it is 18 s)
d_date = datetime.strptime(time_first_frame , '%Y:%m:%d %H:%M:%S.%f') + pd.Timedelta(seconds = leap_sec)
time_first_frame = d_date.strftime('%Y:%m:%d %H:%M:%S.%f')

print("The date and time of the first frame are : ", time_first_frame)
print("##############################################################################")
print("WRITE SESSION_INFO CSV")
print("##############################################################################\n")
# open csv file if exist, if not create it
SESSION_INFO_PATH = METADATA_PATH +"/session_info.csv"
if os.path.exists(SESSION_INFO_PATH):
   session_info = pd.read_csv(SESSION_INFO_PATH)
else:
   session_info = pd.DataFrame([frames_per_second, time_first_frame, leap_sec]).T
   
# add parameters to df
session_info["frames_per_second"] = frames_per_second
session_info["time_first_frame"] = time_first_frame
session_info["leap_sec"] = leap_sec
#session_info.pop(session_info.columns[0])

# save df
session_info.to_csv(SESSION_INFO_PATH, sep = ',', index=False)    

The date and time of the first frame are :  2022:10:19 11:56:00.400000
##############################################################################
WRITE SESSION_INFO CSV
##############################################################################



In [5]:
flag_gps = 0
flag_device = 0   # old name = flag_gps
flag_base = 0   # 1 : RINEX ; 2 : RGP

# ----- Check if we need to process PPK solution 

for file in os.listdir(GPS_DEVICE_PATH) :
    if ("ppk_solution" in file) and (file.endswith(".LLH")):
        print("We already have a GPS file with PPK solution")
        LLH_PATH = GPS_DEVICE_PATH + "/" + file
        flag_gps = 1


if (flag_gps == 0) :
# ----- Check if we can process PPK solution 

    # 1- If we have a device RINEX folder
    for folder in os.listdir(GPS_DEVICE_PATH) :
        if ("RINEX" in folder) :
            flag_device = 1


    # 2- If we have base GPS data
    if os.path.exists(GPS_BASE_PATH) :
        # 2.a- If we have a base RINEX folder
        for folder in os.listdir(GPS_BASE_PATH) :
            if ("RINEX" in folder) :
                flag_base = 1

        # 2.b- If we don't have a RINEX folder, look for RGP files
        if (flag_base == 0) :
            for file in os.listdir(GPS_BASE_PATH) :
                # 2.b.a- If we have data from RGP station
                if file.endswith("d") :
                    flag_base = 2

        # 2.b.b- If we don't have data from RGP station, download it
        if (flag_base == 0) :
            print("Downloading RGP data from", rgp_station, "station :")
            flag_base = download_rgp(SESSION_NAME, time_first_frame, FRAMES_PATH, GPS_BASE_PATH, rgp_station, delta_time)

    if (flag_base != 0) and (flag_device != 0) :
        print("we can do PPK on our data !")
        LLH_PATH = ppk(SESSION_NAME, GPS_BASE_PATH, GPS_DEVICE_PATH, PPK_CONFIG_PATH, ppk_cfgs, flag_base)
        flag_gps = 1


# ----- If we cannot process PPK solution
if (flag_gps == 0) :
    print("we cannot do PPK on our data at the moment !")
    
    # Look for the LLH folder
    for folder in os.listdir(GPS_DEVICE_PATH) :
        if ("LLH" in folder) :

            LLH_FOLDER_PATH = GPS_DEVICE_PATH + "/" + folder.replace(".zip", "") + "/"
            
            # If it is a zipped folder, unzip it
            if folder.endswith(".zip") :
                
                with zipfile.ZipFile(GPS_DEVICE_PATH + "/" + folder, 'r') as zip_ref:
                    zip_ref.extractall(LLH_FOLDER_PATH)
                break
    
    # Get the gps file
    for file in os.listdir(LLH_FOLDER_PATH):
        if file.endswith(".LLH"):
            flag_gps = 1
            LLH_PATH = LLH_FOLDER_PATH + "/" + file
            break

# ----- Get the final GPS file with or without PPK solution
# Check if we have a GPS file
if flag_gps == 1 :
    print(os.path.join("We have the following navigation file : ", LLH_PATH))
    TXT_PATH  = llh_to_txt(LLH_PATH)
    print("The NEW navigation file will be : ", TXT_PATH)
else :
    print("We do not have a navigation file")

We already have a GPS file with PPK solution
/media/clrintz/plancha_drive_3/plancha_session/data/session_2022_10_19_aldabra_ARM01_plancha_body_v1A_00/GPS/DEVICE/ppk_solution_session_2022_10_19_aldabra_ARM01_plancha_body_v1A_00.LLH
The NEW navigation file will be :  /media/clrintz/plancha_drive_3/plancha_session/data/session_2022_10_19_aldabra_ARM01_plancha_body_v1A_00/GPS/DEVICE/ppk_solution_session_2022_10_19_aldabra_ARM01_plancha_body_v1A_00.txt


In [6]:
flag_bathy = 0

if os.path.isdir(BATHY_PATH) :
    for file in os.listdir(BATHY_PATH):
        if file.endswith("bathy_preproc.csv"):
            flag_bathy = 1
            BATHY_PREPROC_PATH = BATHY_PATH + "/" + file
            CSV_BATHY_PREPOC = bathy_preproc_to_txt(BATHY_PREPROC_PATH)

In [7]:
print("##############################################################################")
print("1 of 4 : EXPORT VIDEO & FRAME METADATA TO CSV")
print("##############################################################################\n")
CSV_EXIFTOOL_FRAMES = METADATA_PATH + "/metadata.csv"
CSV_EXIFTOOL_VIDEO =  METADATA_PATH + "/csv_exiftool_video.csv"
export_frame_metadata =  "exiftool -csv -fileorder filename " + FRAMES_PATH + " > " + CSV_EXIFTOOL_FRAMES
os.system(export_frame_metadata)

# import frames metadata
csv_exiftool_frames = pd.read_csv(CSV_EXIFTOOL_FRAMES)
# import video metadata
if os.path.isdir(VIDEOS_PATH) :
    # for each file in the videos folder
    for file in os.listdir(VIDEOS_PATH):
        if file.endswith(".MP4") or file.endswith(".mp4"):    
            CSV_EXIFTOOL_VIDEO =  METADATA_PATH + "/csv_exiftool_video.csv"
            export_video_metadata =  "exiftool -csv  " + VIDEOS_PATH + "/" + file + " > " + CSV_EXIFTOOL_VIDEO
            os.system(export_video_metadata)
            break
csv_exiftool_video = pd.read_csv(CSV_EXIFTOOL_VIDEO)
# filter video metadata
useful_video_metadata_names = ['LensSerialNumber', 'CameraSerialNumber', 'Model', 'AutoRotation', 'DigitalZoom', 'ProTune', 'WhiteBalance', 'Sharpness', 'ColorMode', 'MaximumShutterAngle', 'AutoISOMax', 'AutoISOMin', 'ExposureCompensation', 'Rate', 'FieldOfView', 'ElectronicImageStabilization', 'ImageWidth', 'ImageHeight', 'SourceImageHeight', 'XResolution', 'VideoFrameRate', 'ImageSize',	'Megapixels', 'AvgBitrate']

video_col_names = csv_exiftool_video.columns
video_intersection_list = list(set(video_col_names) & set(useful_video_metadata_names))
csv_exiftool_video = csv_exiftool_video[video_intersection_list]
useful_video_metadata_values = csv_exiftool_video.iloc[0]
# write video's metadata to frame csv
for i in range(len(video_intersection_list)):
    csv_exiftool_frames[video_intersection_list[i]] = useful_video_metadata_values[i]
# concat session_info csv and csv_exiftool_video csv
result = pd.concat([session_info, csv_exiftool_video], axis=1)
result.to_csv(SESSION_INFO_PATH, sep = ',', index=False)
# then remove csv_exiftool_video csv
os.remove(CSV_EXIFTOOL_VIDEO)	
print("##############################################################################")
print("2 of 4 : ADD DATE AND TIME TO CSV METADATA")
print("##############################################################################\n")
# convert "time_first_frame" to "time_first_frame_np" in order to create np vector of DateTime
time_first_frame_np = time_first_frame.replace(" ", "T")
time_first_frame_np = time_first_frame_np.replace(":", "-", 2)
# define time first frame in np format, step and nb of samples
start = np.datetime64(time_first_frame_np)
step = np.timedelta64(int(1/float(frames_per_second)*1000), "ms")
nb_of_frames = csv_exiftool_frames.shape[0] 
# create vector of dates and times
datetime_vec_np = np.arange(0,nb_of_frames)*step+start
datetime_vec = []
# convert "datetime_vec_np" to "datetime_vec" in order to create vector of DateTime in Exiftool format
for curr_datetime in datetime_vec_np :
    curr_datetime = str(curr_datetime)
    curr_datetime = curr_datetime.replace("T", " ")
    curr_datetime = curr_datetime.replace("-", ":", 2)
    datetime_vec.append(curr_datetime)
csv_exiftool_frames["SubSecDateTimeOriginal"] = datetime_vec
csv_exiftool_frames["SubSecDateTimeOriginal_np"] = datetime_vec_np


if flag_gps == 1 :
    print("##############################################################################")
    print("3 of 4 : ADD POSITION, ROLL, PITCH, YAW, DEPTH TO CSV METADATA")
    print("##############################################################################\n")
    # convert "SubSecDateTimeOriginal_np" to unix time in order to do interpolation
    # please see : https://www.unixtimestamp.com/
    csv_exiftool_frames['datetime_unix'] = csv_exiftool_frames['SubSecDateTimeOriginal_np'].astype('int64')
    #############################
    # import lat and lon from LLH
    #############################
    #LLH_PATH = "/home/mcontini/Desktop/PhD/Git_projects/plancha/prova/reach_raw_202210200948_b33_ppk.txt"
    csv_llh = pd.read_csv(TXT_PATH)
    # create datetime col
    csv_llh['SubSecDateTimeOriginal_np'] = csv_llh['GPSDateStamp'] + ' ' + csv_llh['GPSTimeStamp']
    # adapt format to the exiftool one
    csv_llh['SubSecDateTimeOriginal_np'] = csv_llh['SubSecDateTimeOriginal_np'].str.replace("/", "-")
    # convert column to date type
    csv_llh['SubSecDateTimeOriginal_np'] = pd.to_datetime(csv_llh['SubSecDateTimeOriginal_np'])
    csv_llh['datetime_unix'] = csv_llh['SubSecDateTimeOriginal_np'].values.astype('int64')
    # linear interpolation, if different interpolation needed :
    # please see :
    # https://docs.scipy.org/doc/scipy/tutorial/interpolate/1D.html
    csv_exiftool_frames['GPSLatitude'] = np.interp(csv_exiftool_frames['datetime_unix'], csv_llh['datetime_unix'], csv_llh['GPSLatitude'])
    csv_exiftool_frames['GPSLongitude'] = np.interp(csv_exiftool_frames['datetime_unix'], csv_llh['datetime_unix'], csv_llh['GPSLongitude'])
    # we want to write lat and lon in "Composite" family tags, because in "Exif" family tags we cannot assign "-" sign to lat and lon
    csv_exiftool_frames.rename(columns={"GPSLatitude": "Composite:GPSLatitude", "GPSLongitude": "Composite:GPSLongitude"}, inplace=True)

if flag_bathy == 1 :
    ######################################################
    # import roll, pitch, yaw and depth from bathy_preproc
    ######################################################

    csv_bathy_preproc = pd.read_csv(CSV_BATHY_PREPOC)
    # delete 3 last digit of "GPS_time"
    csv_bathy_preproc['GPS_time'] = csv_bathy_preproc['GPS_time'].str[:-3]
    # adapt format to the exiftool one
    csv_bathy_preproc['GPS_time'] = pd.to_datetime(csv_bathy_preproc['GPS_time'])
    csv_bathy_preproc['datetime_unix'] = csv_bathy_preproc['GPS_time'].values.astype('int64')

    csv_exiftool_frames['XMP:GPSRoll'] = np.interp(csv_exiftool_frames['datetime_unix'], csv_bathy_preproc['datetime_unix'], csv_bathy_preproc['GPSRoll'])
    csv_exiftool_frames['XMP:GPSPitch'] = np.interp(csv_exiftool_frames['datetime_unix'], csv_bathy_preproc['datetime_unix'], csv_bathy_preproc['GPSPitch'])
    csv_exiftool_frames['XMP:GPSTrack'] = np.interp(csv_exiftool_frames['datetime_unix'], csv_bathy_preproc['datetime_unix'], csv_bathy_preproc['GPSTrack'])
    csv_exiftool_frames['GPSAltitude'] = np.interp(csv_exiftool_frames['datetime_unix'], csv_bathy_preproc['datetime_unix'], csv_bathy_preproc['GPSAltitude'])
    # delete GPS:Position column
    #csv_exiftool_frames = csv_exiftool_frames.drop('GPSPosition', axis=1)
    
    # set altitude below sea level
    csv_exiftool_frames['GPSAltitudeRef'] = "Below Sea Level"

# add useful GoPro metadata
if csv_exiftool_frames['FieldOfView'][0]=="Linear" :
    csv_exiftool_frames['EXIF:FocalLength'] = "2.92"
    csv_exiftool_frames["EXIF:FocalLengthIn35mmFormat"] = "15"

print("##############################################################################")
print("4 of 4 : IMPORT EXIF METADATA")
print("##############################################################################\n")
# save frame csv, before import metadata
csv_exiftool_frames.to_csv(CSV_EXIFTOOL_FRAMES, index=False)

# CLR 07/07/2023 ---- Comment the next 2 lines so that we don't geotag the frames --> faster ! ----------
import_csv_metadata =  "exiftool -config " + exiftool_config_path + " -csv=" + CSV_EXIFTOOL_FRAMES + " -fileorder filename " + FRAMES_PATH + " -overwrite_original"
os.system(import_csv_metadata)

# once we have imported all metadata, remove useless columns from metadata csv
col_names = csv_exiftool_frames.columns
# EXIF metadata we want to keep, please check :
# https://docs.google.com/spreadsheets/d/1iSKDvFrh-kP9wOU9bt9H7lcZKOnF7pe9n-8t15pOrmw/edit?usp=sharing
keep_param_list = ["ApertureValue", "Compression", "Contrast", "CreateDate", "DateCreated", "DateTimeDigitized", "DateTimeOriginal", "DigitalZoomRatio", "ExifImageHeight", "ExifImageWidth", 
                    "ExifToolVersion", "ExifVersion", "ExposureCompensation", "ExposureMode", "ExposureProgram", "FileName", "FileSize", "FileType", "FileTypeExtension", "FNumber", 
                    "FocalLength", "FocalLength35efl", "FocalLengthIn35mmFormat", "FOV", "GPSAltitude", "GPSAltitudeRef", "GPSDateTime", "GPSDate", "GPSTime", "GPSLatitude", "GPSLatitudeRef", "GPSLongitude", 
                    "GPSLongitudeRef", "GPSMapDatum", "GPSPosition", "GPSTimeStamp", "GPSRoll", "GPSPitch", "GPSTrack", "ImageHeight", "ImageWidth", "LightValue", "Make", "MaxApertureValue", 
                    "MaximumShutterAngle", "Megapixels", "MeteringMode", "MIMEType", "Model", "Saturation", "ScaleFactor35efl", "SceneCaptureType", "SceneType", "SensingMethod", "Sharpness", 
                    "ShutterSpeed", "Software", "SubSecDateTimeOriginal", "ThumbnailImage", "ThumbnailLength", "ThumbnailOffset", "WhiteBalance", "XResolution", "YResolution", "Composite:GPSLatitude", "Composite:GPSLongitude"]
# intersection between metadata we want to keep and EXIF metadata
intersection_list = list(set(col_names) & set(keep_param_list))
# filter df
csv_exiftool_frames = csv_exiftool_frames[intersection_list]
# delete all empty columns
csv_exiftool_frames.dropna(axis=1,inplace=True)
# delete all zero columns
csv_exiftool_frames = csv_exiftool_frames.loc[:, (csv_exiftool_frames != 0).any(axis=0)]
# delete useless col
#csv_exiftool_frames.drop("SubSecDateTimeOriginal_np", axis=1, inplace=True)

# save filtered frame csv, after import metadata
csv_exiftool_frames.to_csv(CSV_EXIFTOOL_FRAMES, index=False)

# end message
os.system('spd-say "geotagging frames is done"')

##############################################################################
1 of 4 : EXPORT VIDEO & FRAME METADATA TO CSV
##############################################################################



    1 directories scanned
 9002 image files read


##############################################################################
2 of 4 : ADD DATE AND TIME TO CSV METADATA
##############################################################################

##############################################################################
3 of 4 : ADD POSITION, ROLL, PITCH, YAW, DEPTH TO CSV METADATA
##############################################################################

##############################################################################
4 of 4 : IMPORT EXIF METADATA
##############################################################################



    1 directories scanned
 9002 image files updated


0

In [None]:
# PUT CURRENT SESSION IN NEW ARBORESCENCE FORMAT

In [None]:
ORIGIN_ROOT = "/media/mcontini/plancha_drive_32/plancha_session/data"
DEST_ROOT = "/media/mcontini/Data_Paper_Hard_Disk/data"
# If it does not already exist, create a new folder
data_folder_name = os.path.basename(os.path.normpath(DEST_ROOT))
NEW_DEST_ROOT =  os.path.dirname(DEST_ROOT) + "/" + data_folder_name + "_new"

# we will treat the following root
if not os.path.exists(NEW_DEST_ROOT):
    # create new root folder
    print("we are creating the following folder : \n", NEW_ROOT)
    os.makedirs(NEW_DEST_ROOT)


NEW_SESSION_PATH = NEW_DEST_ROOT + "/" + SESSION_NAME

# if NEW_SESSION_PATH folder does not exist, create it with all subfolders
if not os.path.exists(NEW_SESSION_PATH):
    create_new_session_folder(ORIGIN_ROOT, NEW_DEST_ROOT, SESSION_NAME)

else :
    print("we have already treated the session : \n", NEW_SESSION_PATH)

# rename images old names
os.system("rename 's/pascal_20151210_PAER/hermitage_mask_v1A/g' " + IMAGES_PATH + "/*.JPG")
CSV_EXIFTOOL_FRAMES = METADATA_PATH + "/metadata.csv"
export_frame_metadata =  "exiftool -csv -fileorder filename " + IMAGES_PATH + " > " + CSV_EXIFTOOL_FRAMES
os.system(export_frame_metadata)

csv_exiftool_frames = pd.read_csv(CSV_EXIFTOOL_FRAMES)
#import_csv_metadata =  "exiftool -csv=" + CSV_EXIFTOOL + " -fileorder filename " + FRAMES_PATH + " -c '%.9f'"

# once we have imported all metadata, remove useless columns from metadata csv
col_names = csv_exiftool_frames.columns
# EXIF metadata we want to keep, please check :
# https://docs.google.com/spreadsheets/d/1iSKDvFrh-kP9wOU9bt9H7lcZKOnF7pe9n-8t15pOrmw/edit?usp=sharing
keep_param_list = ["ApertureValue", "Compression", "Contrast", "CreateDate", "DateCreated", "DateTimeDigitized", "DateTimeOriginal", "DigitalZoomRatio", "ExifImageHeight", "ExifImageWidth", 
                    "ExifToolVersion", "ExifVersion", "ExposureCompensation", "ExposureMode", "ExposureProgram", "FileName", "FileSize", "FileType", "FileTypeExtension", "FNumber", 
                    "FocalLength", "FocalLength35efl", "FocalLengthIn35mmFormat", "FOV", "GPSAltitude", "GPSAltitudeRef", "GPSDateTime", "GPSDate", "GPSTime", "GPSLatitude", "GPSLatitudeRef", "GPSLongitude", 
                    "GPSLongitudeRef", "GPSMapDatum", "GPSPosition", "GPSTimeStamp", "GPSRoll", "GPSPitch", "GPSTrack","ImageHeight", "ImageWidth", "LightValue", "Make", "MaxApertureValue", 
                    "MaximumShutterAngle", "Megapixels", "MeteringMode", "MIMEType", "Model", "Saturation", "ScaleFactor35efl", "SceneCaptureType", "SceneType", "SensingMethod", "Sharpness", 
                    "ShutterSpeed", "Software", "SubSecDateTimeOriginal", "ThumbnailImage", "ThumbnailLength", "ThumbnailOffset", "WhiteBalance", "XResolution", "YResolution"]
# intersection between etadata we want to keep and EXIF meatdata
intersection_list = list(set(col_names) & set(keep_param_list))
# filter df
csv_exiftool_frames = csv_exiftool_frames[intersection_list]
# delete all empty columns
csv_exiftool_frames.dropna(axis=1,inplace=True)
# delete all zero columns
csv_exiftool_frames = csv_exiftool_frames.loc[:, (csv_exiftool_frames != 0).any(axis=0)]
# delete useless col
#csv_exiftool_frames.drop("SubSecDateTimeOriginal_np", axis=1, inplace=True)
# save filtered frame csv, after import metadata
csv_exiftool_frames.to_csv(CSV_EXIFTOOL_FRAMES, index=False)


# IMAGES DEJA ANNOTEES
# set correct time
os.system("exiftool -m '-SubSecTimeOriginal<GPSDateTime'  '-SubSecTime<GPSDateTime' '-SubSecTimeDigitized<GPSDateTime' '-datetimeoriginal<GPSDateTime' " + FRAMES_PATH + " -fileorder filename -overwrite_original")
# rename images old names
os.system("rename 's/_image_/_/g' " + FRAMES_PATH + "/*.jpg")
print("##############################################################################")
print("1 of 4 : EXPORT VIDEO & FRAME METADATA TO CSV")
print("##############################################################################\n")
CSV_EXIFTOOL_FRAMES = METADATA_PATH + "/metadata.csv"
'''
CSV_EXIFTOOL_VIDEO =  METADATA_PATH + "/csv_exiftool_video.csv"
export_frame_metadata =  "exiftool -csv -fileorder filename " + FRAMES_PATH + " > " + CSV_EXIFTOOL_FRAMES
os.system(export_frame_metadata)

# import frames metadata
csv_exiftool_frames = pd.read_csv(CSV_EXIFTOOL_FRAMES)
# import video metadata
csv_exiftool_video = pd.read_csv(CSV_EXIFTOOL_VIDEO)
# filter video metadata
useful_video_metadata_names = ['LensSerialNumber', 'CameraSerialNumber', 'Model', 'AutoRotation', 'DigitalZoom', 'ProTune', 'WhiteBalance', 'Sharpness', 'ColorMode', 'MaximumShutterAngle', 'AutoISOMax', 'AutoISOMin', 'ExposureCompensation', 'Rate', 'FieldOfView', 'ElectronicImageStabilization', 'ImageWidth', 'ImageHeight', 'SourceImageHeight', 'XResolution', 'VideoFrameRate', 'ImageSize',	'Megapixels', 'AvgBitrate']

video_col_names = csv_exiftool_video.columns
video_intersection_list = list(set(video_col_names) & set(useful_video_metadata_names))
csv_exiftool_video = csv_exiftool_video[video_intersection_list]
useful_video_metadata_values = csv_exiftool_video.iloc[0]
# write video's metadata to frame csv
for i in range(len(video_intersection_list)):
    csv_exiftool_frames[video_intersection_list[i]] = video_intersection_list[i]
# concat session_info csv and csv_exiftool_video csv
result = pd.concat([session_info, csv_exiftool_video], axis=1)
result.to_csv(SESSION_INFO_PATH, sep = ',', index=False)
# then remove csv_exiftool_video csv
os.remove(CSV_EXIFTOOL_VIDEO)		
'''

print("##############################################################################")
print("4 of 4 : IMPORT EXIF METADATA")
print("##############################################################################\n")
# save frame csv, before import metadata
csv_exiftool_frames.to_csv(CSV_EXIFTOOL_FRAMES, index=False)
#import_csv_metadata =  "exiftool -csv=" + CSV_EXIFTOOL + " -fileorder filename " + FRAMES_PATH + " -c '%.9f'"

# once we have imported all metadata, remove useless columns from metadata csv
col_names = csv_exiftool_frames.columns
# EXIF metadata we want to keep, please check :
# https://docs.google.com/spreadsheets/d/1iSKDvFrh-kP9wOU9bt9H7lcZKOnF7pe9n-8t15pOrmw/edit?usp=sharing
keep_param_list = ["ApertureValue", "Compression", "Contrast", "CreateDate", "DateCreated", "DateTimeDigitized", "DateTimeOriginal", "DigitalZoomRatio", "ExifImageHeight", "ExifImageWidth", 
                    "ExifToolVersion", "ExifVersion", "ExposureCompensation", "ExposureMode", "ExposureProgram", "FileName", "FileSize", "FileType", "FileTypeExtension", "FNumber", 
                    "FocalLength", "FocalLength35efl", "FocalLengthIn35mmFormat", "FOV", "GPSAltitude", "GPSAltitudeRef", "GPSDateTime", "GPSDate", "GPSTime", "GPSLatitude", "GPSLatitudeRef", "GPSLongitude", 
                    "GPSLongitudeRef", "GPSMapDatum", "GPSPosition", "GPSTimeStamp", "GPSRoll", "GPSPitch", "GPSTrack","ImageHeight", "ImageWidth", "LightValue", "Make", "MaxApertureValue", 
                    "MaximumShutterAngle", "Megapixels", "MeteringMode", "MIMEType", "Model", "Saturation", "ScaleFactor35efl", "SceneCaptureType", "SceneType", "SensingMethod", "Sharpness", 
                    "ShutterSpeed", "Software", "SubSecDateTimeOriginal", "ThumbnailImage", "ThumbnailLength", "ThumbnailOffset", "WhiteBalance", "XResolution", "YResolution"]
# intersection between etadata we want to keep and EXIF meatdata
intersection_list = list(set(col_names) & set(keep_param_list))
# filter df
csv_exiftool_frames = csv_exiftool_frames[intersection_list]
# delete all empty columns
csv_exiftool_frames.dropna(axis=1,inplace=True)
# delete all zero columns
csv_exiftool_frames = csv_exiftool_frames.loc[:, (csv_exiftool_frames != 0).any(axis=0)]
# delete useless col
#csv_exiftool_frames.drop("SubSecDateTimeOriginal_np", axis=1, inplace=True)
# save filtered frame csv, after import metadata
csv_exiftool_frames.to_csv(CSV_EXIFTOOL_FRAMES, index=False)

# end message
os.system('spd-say "lets gooooooooooooooooooooooooooooooo"')

In [None]:
'''
#RINEX_PATH = GPS_PATH + "/reach_raw_202210211131_RINEX_3_03"
#RINEX_PATH = GPS_PATH + "/reach_raw_202210231231_RINEX_3_03"
flag_gps = 0
flag_bathy = 0
flag_rinex = 0
GPS_DEVICE_PATH = GPS_PATH + "/DEVICE"
GPS_BASE_PATH = GPS_PATH + "/BASE"

# check if we can do ppk
for folder in os.listdir(GPS_DEVICE_PATH) :
    # Look for the unzipped RINEX folder
    if "RINEX" in folder and folder.endswith(".zip") :
        flag_rinex = 1
if os.path.exists(GPS_BASE_PATH):
    if len(os.listdir(GPS_BASE_PATH)) != 0 and flag_rinex == 1:
        flag_gps = 1
        print("we can do PPK on our data !")
        LLH_PATH = ppk(SESSION_NAME, GPS_BASE_PATH, GPS_DEVICE_PATH, PPK_CONFIG_PATH, ppk_cfgs)

# if we cannot do ppk
if flag_gps == 0 :
    print("we cannot do PPK on our data at the moment !")
    for folder in os.listdir(GPS_DEVICE_PATH) :
        # if we have an unzipped LLH folder
        if "LLH" in folder and not folder.endswith(".zip") :
            LLH_FOLDER_PATH = GPS_DEVICE_PATH + "/" + folder
            for file in os.listdir(LLH_FOLDER_PATH):
                if file.endswith(".LLH"):
                    flag_gps = 1
                    LLH_PATH = LLH_FOLDER_PATH + "/" + file
                    break
        if "LLH" in folder and folder.endswith(".zip") and flag_gps == 0:
            LLH_FOLDER_PATH = GPS_DEVICE_PATH + "/" + folder.replace(".zip", "")
            with zipfile.ZipFile(GPS_DEVICE_PATH + "/" + folder, 'r') as zip_ref:
                zip_ref.extractall(LLH_FOLDER_PATH)
            for file in os.listdir(LLH_FOLDER_PATH):
                if file.endswith(".LLH"):
                    flag_gps = 1
                    LLH_PATH = LLH_FOLDER_PATH + "/" + file
                    break            
        
# check if we have a navigation file
if flag_gps == 1 :
    print(os.path.join("we have the following navigation file : ", LLH_PATH))
else :
    print("we do not have a navigation file")

# if we have a navigation file
if flag_gps :
    TXT_PATH  = llh_to_txt(LLH_PATH)
    print("the NEW navigation file will be : ", TXT_PATH)

if os.path.isdir(BATHY_PATH) :
    # for each file in the videos folder
    for file in os.listdir(BATHY_PATH):
        if file.endswith("bathy_preproc.csv"):
            flag_bathy = 1
            BATHY_PREPOC_PATH = BATHY_PATH + "/" + file
            CSV_BATHY_PREPOC = bathy_preproc_to_txt(BATHY_PREPOC_PATH)
'''

In [None]:
'''
# prova per sunnith roll pitch yaw
my_time = time_first_frame
DIR = "/media/mcontini/Data_Paper_Hard_Disk/session_2022_10_21_aldabra_DUBOIS_plancha_body_v1A_01/DCIM/videos/frames/"
if os.path.isdir(BATHY_PATH) :
    # for each file in the videos folder
    for file in os.listdir(BATHY_PATH):
        if file.endswith("bathy_preproc.csv"):
            flag_bathy = 1
            BATHY_PREPOC_PATH = BATHY_PATH + "/" + file
txt_path = bathy_preproc_to_txt(BATHY_PREPOC_PATH)
txt_path = txt_path[0]
print("##############################################################################")
print("1 of 6 : We are setting the following date and time ", my_time, " to all the frames")
print("##############################################################################\n")
change_date_and_time =  "exiftool -m '-SubSecDateTimeOriginal= " + my_time + "' " + DIR + " -fileorder filename -overwrite_original"
os.system(change_date_and_time)
#print_date_and_time = "exiftool -m -SubSecDateTimeOriginal " + DIR + " -fileorder filename"
#os.system(print_date_and_time)
# First, copy SubSecDateTimeOriginal to XMP:DateTimeOriginal
print("##############################################################################")
print("2 of 6 : We are copying the EXIF time to XMP times")
print("##############################################################################\n")
change_tag_date_and_time = "exiftool -m '-XMP:DateTimeOriginal<SubSecDateTimeOriginal' " + DIR + " -fileorder filename -overwrite_original"
os.system(change_tag_date_and_time)
# Then, run your microsecond change on the XMP:DateTimeOriginal.  This works because XMP timestamps are more flexible than EXIF timestamps.
print("##############################################################################")
print("3 of 6 : We are incrementing the XMP time")
print("##############################################################################\n")
increment_time = "exiftool -m '-XMP:DateTimeOriginal+<0:0:${filesequence;$_*=" + str(delta_time) + "}' " + DIR + " -fileorder filename -overwrite_original"
os.system(increment_time)
#print_date_and_time = "exiftool -m -XMP:DateTimeOriginal " + DIR + " -fileorder filename"
#os.system(print_date_and_time)
# Finally, copy the XMP:DateTimeOriginal to the other timestamps.
print("##############################################################################")
print("4 of 6 : Weare updating EXIF times")
print("##############################################################################\n")
update_date_and_time =  "exiftool -m '-SubSecTimeOriginal<XMP:DateTimeOriginal'  '-SubSecTime<XMP:DateTimeOriginal' '-SubSecTimeDigitized<XMP:DateTimeOriginal' '-datetimeoriginal<XMP:DateTimeOriginal' " + DIR + " -fileorder filename -overwrite_original"
os.system(update_date_and_time)
# Add metadata in DIR_GPS to images in DIR. Since Roll and Pitch are not standard tags, we need to create them in the config file. Then syncronize Latitude, Longitude, Yaw, Roll and Pitch 
# thanks to the geotag command, Yaw is stored in the XMP-exif:GPSTrack tag. 
# For more infromations, plase refer to :
# https://exiftool.org/forum/index.php?topic=14155.0
print("##############################################################################")
print("5 of 6 : We are adding XMP metadata to frames")
print("##############################################################################\n")
write_gps_roll_pitch_yaw = "exiftool -config " + exiftool_config_path + " -m -geotag " + txt_path + " '-xmp:geotime<${XMP:DateTimeOriginal}+00:00' " + DIR + " -overwrite_original"
os.system(write_gps_roll_pitch_yaw)
print("##############################################################################")
print("6 of 6 : We are copying XMP metadata to EXIF metadata")
print("##############################################################################\n")
update_geotag_metadata = "exiftool -r -overwrite_original '-gps:all<xmp-exif:all' " + DIR
os.system(update_geotag_metadata)
print("##############################################################################")
print("############################ FINISHED ########################################")
print("##############################################################################\n")
'''

CREATE METADATA MATISSE 2D

In [None]:
print("##################")
print("image metadata will be written in a few seconds")
# create metadata csv file
MATISSE_PATH = SESSION_PATH + "/METADATA/" + SESSION_NAME + "_matisse_metadata.txt"

os.system("exiftool -T -n -csv -ext jpeg " + FRAMES_PATH + "> " + MATISSE_PATH)
# df with all EXIF metadata
df = pd.read_csv(METADATA_PATH)  
col_names = df.columns
# EXIF metadata we want to keep, please check :
# https://docs.google.com/spreadsheets/d/1iSKDvFrh-kP9wOU9bt9H7lcZKOnF7pe9n-8t15pOrmw/edit?usp=sharing
keep_param_list = [ "GPSDateTime", "GPSAltitude", "GPSLatitude", "GPSLongitude", 
                     "GPSRoll", "GPSPitch", "GPSTrack"]
# intersection between etadata we want to keep and EXIF meatdata
intersection_list = list(set(col_names) & set(keep_param_list))
# filter df
df = df[intersection_list]

# Standardize time to have 3 millisecond digits
for i in range(0, len(df["GPSDateTime"])) :
    time = df["GPSDateTime"][i]
    # print(time)
    # If there is only one millisecond digit, add two '0'
    if len(time) == 21 :
        df["GPSDateTime"][i] = df["GPSDateTime"][i] + "00"
    # If there are two millisecond digit, add one '0'
    elif len(time) == 22 :
        df["GPSDateTime"][i] = df["GPSDateTime"][i] + "0"


# Divide date and time into 2 columns
df[["date_yyyy/MM/dd", "time"]] = df["GPSDateTime"].str.split(" ", 1, expand = True)
df.drop("GPSDateTime", inplace = True, axis = 1)

# Rename the columns for Matisse file
df.rename(columns = {"GPSAltitude": "depth", "GPSLatitude": "latitude", "GPSLongitude": "longitude", "GPSRoll": "roll", "GPSPitch": "pitch", "GPSTrack": "heading"}, inplace = True)

# Convert date from yyyy:MM:dd to yyyy/MM/dd (Matisse format)
df["date_yyyy/MM/dd"] = df["date_yyyy/MM/dd"].str.replace(":", "/")

# Sort the dataframe by time order
df = df.sort_values(by = "time", ascending = True)


# delete files that are not valid images
#df = df[df["FileName"].astype(str).str.startswith("session")]
# save df
#df.to_csv(METADATA_PATH)
#print(METADATA_PATH)

In [None]:
# Save the Matisse metadata into a csv file

df.to_csv(MATISSE_PATH, header = True, sep = ",", index = False)   
line = "##" 
with open(MATISSE_PATH, "r+") as file: 
 file_data = file.read() 
 file.seek(0, 0) 
 file.write(line + file_data)

RENAMING FRAMES FOR MATISSE 2D

In [None]:
# Copy and paste the frames in another folder

print("##################")
print("Creating a new folder called 'frames_matisse' and copying all frames inside")

FRAMES_PATH_M = VIDEOS_PATH + "/frames_matisse"

# If it does not already exist, create a new folder
if not os.path.exists(FRAMES_PATH_M):
    os.makedirs(FRAMES_PATH_M)

# Copy and paste all frames
os.system("cp -r " + FRAMES_PATH + "/* " + FRAMES_PATH_M)

In [None]:
print("##################")
print("Getting the right name for all frames in Matisse format")

# Transform to YYYYMMDDThhmmss.fffZ Matisse format in order to rename the frames
df = pd.read_csv(MATISSE_PATH)  
FrameName = df["date_yyyy/MM/dd"] + "T" + df["time"] + "Z"    
FrameName = FrameName.str.replace("/", "")
FrameName = FrameName.str.replace(":", "")

# Create a function to correctly sort the frames (ex : 1, 2, 10 instead of Python usually 1, 10, 2)
import re
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)


print("Renaming all frames...")

# Rename each frame following the time order
folder = FRAMES_PATH_M

for count, oldname in (enumerate(sorted_alphanumeric(os.listdir(folder)))) :
    oldpath =f"{folder}/{oldname}"
    newname = FrameName[count] + ".jpeg"
    newpath = f"{folder}/{newname}"
    os.rename(oldpath, newpath)
 

NN PREDICTIONS AND EXPORT

In [None]:
import fiftyone as fo
import fiftyone.utils.coco as fouc
import fiftyone.utils.data as foud
import os
import torch
import pandas as pd
import json
from tqdm import tqdm
from datetime import datetime
import yaml
import sys

sys.path.insert(0, '/home/mcontini/Desktop/PhD/Git_projects/fiftyone/seatizen/dataset_engineering/')
from fo_dataset_creation import create_or_load_dataset
from custom_csv_exporter import CSVImageClassificationDatasetExporter
from darwincore import DarwinCoreExporter, match_taxa_in_worms_database

sys.path.insert(0, '/home/mcontini/Desktop/PhD/Git_projects/fiftyone/segmentation_predictions/')
from add_predictions import build_predictor, add_predictions_to_dataset

# print and clean all permanent datasets
print(fo.list_datasets())
for i in fo.list_datasets() :
    dataset = fo.load_dataset(i)
    dataset.delete()

In [None]:
### Set paths and variables
dataset_name = SESSION_NAME
dataset_name = "/home/mcontini/Desktop/Ifremer/Seatizen/data/session_2022_10_20_aldabra_ARM01_plancha_body_v1A_01_*0/"
SESSION_PATH = dataset_name

In [None]:
#images_path = FRAMES_PATH
#metadata_df = METADATA_PATH
metadata_df = dataset_name + "metadata.csv"
images_path = dataset_name
checkpoint_path = "/home/mcontini/Desktop/Ifremer/Seatizen/Git/seatizen/instance_segmentation/outputs/2022-06-15_LR_0.00025_BATCH_2_ITER_150000_1655286559.4163053/model_final.pth"

In [None]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print("The current device is : ", device)
thing_classes = [
    'sea cucumber', 'Syringodium isoetifolium', 'Sand', 'Scrap', 
    'Rock', 'Trample', 'Waste', 'Acropore Branched', 
    'Acropore Digitised', 'Acropore Sub-massive', 'Acropore Tabular', 'No acropore Branched', 
    'No acropore Encrusting', 'No acropore Foliaceous', 'No acropore Massive', 'No acropore Sub massive', 
    'No acropore Solitary', 'Millepore', 'Dead coral', 'fish', 
    'Sponge', 'Sea urchins', 'Clam', 'Algae Limestone', 
    'Algae Drawn up', 'Algae assembly', 'Soft coral', 'Living Coral', 
    'Bleached coral'
    ]

In [None]:
### Create or load 51 dataset
dataset = create_or_load_dataset(
    dataset_name=dataset_name, 
    dataset_type='unlabeled', 
    images_path=images_path)

print(dataset)

In [None]:
### Add metadata to the fiftyone dataset
metadata_df = pd.read_csv(metadata_df, low_memory = False, na_values = ['-', ' '])

In [None]:
for sample in tqdm(dataset):

    image_metadata = metadata_df[metadata_df['FileName']==os.path.basename(sample['filepath'])] # select the metadata matching with the sample
    
    if not image_metadata.empty: # make sure the metadata row was not empty in case the sample did not had metadata associated in the CSV file

        # if lat and long are filled in, add the GPS position as a geolocation field in the 51 dataset
        if image_metadata['GPSLatitude'].hasnans==False and image_metadata['GPSLongitude'].hasnans==False: 
            sample['locations'] = fo.GeoLocation(point=[image_metadata['GPSLatitude'].iloc[0], image_metadata['GPSLongitude'].iloc[0]])
            sample.save()

        # if the datetime from the RTK is filled in, add it as a datetime. Otherwise, use the DateTimeOriginal column (if provided).
        if image_metadata['SubSecDateTimeOriginal'].hasnans and image_metadata['DateTimeOriginal'].hasnans==False :
            date_field_used = 'DateTimeOriginal'
            sample['datetime'] = datetime.strptime(str(image_metadata['DateTimeOriginal'].iloc[0]), '%Y:%m:%d %H:%M:%S')
            sample.save()
        elif image_metadata['SubSecDateTimeOriginal'].hasnans==False:
            date_field_used = 'SubSecDateTimeOriginal'
            sample['datetime'] = datetime.strptime(str(image_metadata['SubSecDateTimeOriginal'].iloc[0]), '%Y:%m:%d %H:%M:%S.%f')
            sample.save()

        # Add each metadata variable (except fields already added in the if statements before) to the sample.
        for exif_variable in image_metadata.drop([ 'GPSLatitude', 'GPSLongitude', date_field_used], axis=1):
            if image_metadata[exif_variable].hasnans==False:
                sample[exif_variable] = image_metadata[exif_variable].iloc[0]
                sample.save()

dataset.save()

In [None]:
### Predict species on images
predictor = build_predictor(checkpoint_path, device, len(thing_classes), "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

In [None]:
dataset = add_predictions_to_dataset(
    dataset, 
    predictor, 
    device, 
    thing_classes, 
    predictions_field='nms_predictions', 
    nms_threshold=0.5)

In [None]:
### Launch fiftyone to admire the results
fo.launch_app(dataset)

In [None]:
### Export results in folders
export_results_directory = SESSION_PATH + "/METADATA/"

In [None]:
#### Export in darwincore
fields_darwincore_mapping = '/home/mcontini/Desktop/Ifremer/Seatizen/Git/seatizen/fiftyone/import_export_dataset/darwincore_mapping/fields_darwincore_mapping.yaml'
taxon_mapping = '/home/mcontini/Desktop/Ifremer/Seatizen/Git/seatizen/fiftyone/import_export_dataset/darwincore_mapping/taxon_mapping_matteo.yaml'
datetime_mapping = '/home/mcontini/Desktop/Ifremer/Seatizen/Git/seatizen/fiftyone/import_export_dataset/darwincore_mapping/darwincore_datetime_mapping.yaml'

# only with internet
with open(taxon_mapping) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

scinames = []
for label in config['CLASSES']:
    scinames.append(config['CLASSES'][label]['taxon_research'])

taxon_information_df = match_taxa_in_worms_database(scinames)

In [None]:
taxon_information_df = pd.read_csv("/home/mcontini/Desktop/Ifremer/Monaco/Monaco_scripts/taxon_information_df.csv")

In [None]:
darwincore_exporter = DarwinCoreExporter(
    os.path.join(export_results_directory, 'Darwincore_export'), 
    fields_darwincore_mapping, 
    taxon_mapping, 
    datetime_mapping,
    taxon_information_df,
    "polylines"
)

In [None]:
dataset.export(
    dataset_exporter=darwincore_exporter,
    label_field = 'nms_predictions',
    export_media='manifest'
    )

In [None]:
#### Export coco
coco_exporter = fouc.COCODetectionDatasetExporter(
    export_dir=os.path.join(export_results_directory, 'COCODataset_export'),
    data_path='manifest.json',
    labels_path='coco_labels.json',
    export_media='manifest',
    classes=dataset.distinct('nms_predictions.polylines.label'),
    tolerance=2, 
    extra_attrs=False)
dataset.export(dataset_exporter=coco_exporter)

In [None]:
#### Export the FiftyoneDataset in json
dataset.export(
    export_dir=os.path.join(export_results_directory, 'FiftyoneDataset_export'),
    dataset_type=fo.types.FiftyOneDataset,
    export_media=False,
    label_field='nms_preidctions',
    overwrite=True
)

In [None]:
import fiftyone as fo
dataset = fo.load_dataset('seatizen')
print(dataset)
fo.launch_app(dataset)