In [6]:
import numpy as np
import os
import subprocess
from fnmatch import fnmatch
import ffmpegSplit
# importlib is imported (optional) so the notebook can reload outside scripts. Without it if the script is changed, the change will not be registed in the notebook
import importlib
importlib.reload(ffmpegSplit)

from weatherGet import getWeather
from weatherDenmarkGet import getClosestStation, getWeatherDK

import pandas as pd
from datetime import datetime,timedelta
import re

# Video Manipulation Class

The class contains a number of functions for working on video files. The class calls functions from the helper class ffmpegSplit, which is based on the work from https://github.com/ToastyMallows/ffmpegSplit

All functions require the installation of ffmpeg https://ffmpeg.org/download.html on the system and use subprocesses to call it from the python code

Each of the parts of the manipulation class works on different type of data:

- **FixClips** requires the input to be large raw data video files. The files need have a specific naming convention - YYYYMMDDHHMM_YYYYMMDDHHMM, where the first one signifies the starting date and time, while the second one signifies the end date and time. The function recodes the file and removes any possible problems with segmented internal timeline or missing clip parts. The recoded videos are saved in the same subdirectory schema as the input videos 
- **splitIntoDays** takes the output directories from **FixClips** and separates the input files into single days. If the file has data larger than a single day it is split at 0:00 every day. If the file contains data less than 1 day it is renamed with the correct beginning and end time. The output from the function has the same naming convention as the previous function
- **cutIntoClips** takes the output directories from **splitIntoDays**. For each day the algorithm splits the file into 2-minute long clips for every 30 min of footage. If the input file contains data less than 2 minutes, then the clip is directly saved and renamed. The clips are renamed with the convention - clip_{number of clip}_{HHMM}, where the number of the clip is reset to 0 for each day at 0:00 or the starting time. The hour and minutes are calculated from the starting date time of the input video, the length of the clips (in our case 2 minutes) and the number of the clip
- **desaturateClip** - as the clips show thermal data, having them in YUV format, does not provide any additional information, but just makes them larger. We desaturate the clips by zeroing out the U and V components for each clip and recoding it. The input and output clips have the same naming convention and position schema in the sub-folders. 


In [7]:
class VideoManipulation():
    def __init__(self, pattern, excludeDirs = [""]):
        
        self.pattern = pattern
        self.excludeDirs = excludeDirs
        
    def desaturateClips(self, inputFileDir, saveFileDir, overwrite = False):
        for path, subdirs, files in os.walk( os.path.join(".", inputFileDir), topdown=True):
            
            for name in files:
                
                if fnmatch(name, self.pattern):
                    
                    curr_videoFilePath = os.path.join(path, name)
                    curr_saveFilePath = os.path.join(saveFileDir,os.path.normpath(path).split("\\")[-1])

                    os.makedirs(curr_saveFilePath, exist_ok=True)
                    ffmpegSplit.desaturate_video(curr_videoFilePath, curr_saveFilePath, name, overwrite)
        
    def fixClips(self, inputFileDir, saveFileDir, overwrite = False):
        for path, subdirs, files in os.walk( os.path.join(".", inputFileDir), topdown=True):

            for name in files:

                if fnmatch(name, self.pattern):

                    curr_videoFilePath = os.path.join(path, name)
                    curr_saveFilePath = os.path.join(saveFileDir,os.path.normpath(path).split("\\")[-1])

                    print(curr_videoFilePath)
                    print(curr_saveFilePath)
                    print("-----")
    
                    os.makedirs(curr_saveFilePath, exist_ok=True)
                    ffmpegSplit.fix_video(curr_videoFilePath, curr_saveFilePath, name, overwrite)
        
        
    def splitIntoDays(self,inputFileDir, outputFileDir, day_time_regex = '%Y%m%d%H%M', day_regex = '%Y%m%d', overwrite = False):
        
        clip_counter = 0

        for path, subdirs, files in os.walk( os.path.join(".", inputFileDir), topdown=True):
    
            for name in files:

                if fnmatch(name, pattern):

                    
                    curr_saveFilePath = os.path.join(outputFileDir,os.path.normpath(path).split("\\")[-1])
                    
                    os.makedirs(curr_saveFilePath, exist_ok=True)
                    clip_counter = ffmpegSplit.split_into_days(curr_saveFilePath, path, name, clip_counter, day_time_regex, day_regex, overwrite)
                    
        
        
    def cutIntoClips(self, skip, splitLength, inputFileDir, saveDir = "",overwrite = False):
        
        for path, subdirs, files in os.walk( os.path.join(".", inputFileDir), topdown=True):

            clip_counter = 0
            prev_fileName = ""
            for name in files:
                
                filename, file_extension = os.path.splitext(name)
                    
                if fnmatch(name, self.pattern):
                    
                    curr_videoFilePath = os.path.join(path, name)
                    
                    curr_dirName = filename
                    
                    
                    outputDirName_list = path.split("\\")
                    outputDirName = outputDirName_list[-1]
                    
                    outputDirName = os.path.join(saveDir, outputDirName) 
                    
                    filename_str = filename.split("_")[0]
                    
                    print(outputDirName)
                    print(filename_str)
                    print(name)
                    

                    os.makedirs(outputDirName, exist_ok=True)
                    
                    if (prev_fileName != outputDirName):
                        clip_counter = 0
                        
                    
                    print("------")
                            
                    clip_counter = ffmpegSplit.split_by_seconds(curr_videoFilePath,skip = skip,output = outputDirName, split_length = splitLength, startTime_str = filename_str, clip_counter = clip_counter, overwrite = overwrite)

                    prev_fileName = outputDirName

                    

# Weather Gather Class

The class contains functions for gathering weather information for each produced clip and combining them with the clip's folder structure and the timestamp into a metadata file.

The class also contains a natural sorting algorithm taken from the cited work. 

The weather data can be gathered from two different weather APIs:
- Meteostat - https://dev.meteostat.net/ - which contains weather data in a hourly basis
- Danish metereological Institute (DMI) - https://confluence.govcloud.dk/display/FDAPI - which contains weather data in 10 minute segments

Each of the two APIs returns slightly different weather data features:
- Meteostat - 'Temperature', 'Humidity','Precipitation', 'Dew Point', 'Wind Direction', 'Wind Speed', 'Pressure'
- DMI - 'Temperature', 'Humidity','Precipitation latest 10 min', 'Dew Point', 'Wind Direction', 'Wind Speed', 'Sun Radiation Intensity', 'Min of sunshine latest 10 min'

For the Meteostat API, a free python library is required, while the DMI API requires an additional registration and obtaining a free unique API key. The two helper functions are used **getWeather** for Meteostat and **getWeatherDK** DMI. In addition the DMI function requires an additional function for determining the closest station to the provided latitude/longitude. Both functions require a given position.

Each clip is processed by taking the day and time data and finding the closest returned weather forecast data to it. This data is saved into a *csv* file together with the timestamp and the folder structure of the clip, representing the date it was taken. If no weather data is present for the required date and time of the clip a list of NaNs is returned. If a specific data feature is not present it is set to 0. The finished csv is saved in the same outer directory as the date folders for the clips.

In [8]:
class WeatherGather():
    def __init__(self, pattern, inputDir, saveDir, weatherSystem):
        
        
        self.pattern = pattern
        self.inputDir = inputDir
        self.saveDir = saveDir
        
        self.weatherSystem = weatherSystem
        
        
    def atoi(self, text):
        return int(text) if text.isdigit() else text

    def natural_keys(self, text):
        '''
        alist.sort(key=natural_keys) sorts in human order
        http://nedbatchelder.com/blog/200712/human_sorting.html
        (See Toothy's implementation in the comments)
         '''
        return [ self.atoi(c) for c in re.split(r'(\d+)', text) ]
    
    
    def hour_rounder(self, t):
        # Rounds to nearest hour by adding a timedelta hour if minute >= 30
        return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)
                   +timedelta(hours=t.minute//30))
        
        
    def createWeatherData(self):
        results = []
        for path, subdirs, files in os.walk( os.path.join(".", self.inputDir), topdown=True):
            
            files.sort(key=self.natural_keys)
            
            pathParts = path.split("\\")
            
            if len(pathParts) == 4:
                
                print(pathParts)
                start_d_str = pathParts[-1]
                
                
                # Get last clip to calculate the end datetime for gathering data
                
                filename_last, _ = os.path.splitext(files[-1])
                name_clipTime_last_str = filename_last.split("_")
                
                filename_first, _ = os.path.splitext(files[0])
                name_clipTime_first_str = filename_first.split("_")
                
                
                
                start_d = datetime.strptime(start_d_str, '%Y%m%d')
                start_h = datetime.strptime(name_clipTime_first_str[-1], '%H%M')
                end_h = datetime.strptime(name_clipTime_last_str[-1], '%H%M')
                
                startDT = datetime.combine(start_d.date(), start_h.time())
                endDT = datetime.combine(start_d.date(), end_h.time())
                
                print(f"{path}   {startDT}    {endDT}" )

                if self.weatherSystem == "meteostat":
                    weatherData = getWeather(startDT, endDT,"hourly")
                elif self.weatherSystem == "dmi":
                    closestStation = getClosestStation()
                    weatherData = getWeatherDK(startDT,endDT,closestStation)
                    
                weatherData.fillna(0)
                
            
            for name in files:
                
                if fnmatch(name, self.pattern):
                    
                    filename, file_extension = os.path.splitext(name)
                
                    
                    name_clipTime_str = filename.split("_")
                    
                    clip_h = datetime.strptime(name_clipTime_str[-1], '%H%M')
                    date_object_clip_start = datetime.combine(start_d.date(), clip_h.time())

                    if not weatherData.empty:
                        
                        if self.weatherSystem == "meteostat":
                            currWeather = weatherData.iloc[weatherData.index.get_loc(self.hour_rounder(date_object_clip_start),method='nearest')]
                            print(f"video clip start: {date_object_clip_start}, rounded to: {self.hour_rounder(date_object_clip_start)},  temp: {currWeather['temp']}, humidity: {currWeather['rhum']}, precipitation: {currWeather['prcp']}, wind dir: {currWeather['wdir']}, wind speed: {currWeather['wspd']}, presure: {currWeather['pres']}")
                            results.append([start_d_str,filename,date_object_clip_start,currWeather['temp'],currWeather['rhum'], currWeather['prcp'],currWeather['dwpt'], currWeather['wdir'], currWeather['wspd'], currWeather['pres']])
                        elif self.weatherSystem == "dmi":
                            currWeather = weatherData.iloc[weatherData.index.get_loc(date_object_clip_start,method='nearest')]
                            # print(f"video clip start: {date_object_clip_start},  temp: {currWeather['temp_dry']}, humidity: {currWeather['humidity']}, precipitation: {currWeather['precip_past10min']},dew_point: {currWeather['temp_dew']}, wind dir: {currWeather['wind_dir']}, wind speed: {currWeather['wind_speed']}, radiation: {currWeather['radia_glob']}, sun: {currWeather['sun_last10min_glob']}")
                            results.append([start_d_str,filename,date_object_clip_start,currWeather['temp_dry'],currWeather['humidity'], currWeather['precip_past10min'],currWeather['temp_dew'], currWeather['wind_dir'], currWeather['wind_speed'], currWeather['radia_glob'], currWeather['sun_last10min_glob']])
                            
                    else:
                        print("Weather DF empty")
                        if self.weatherSystem == "meteostat":
                            results.append([start_d_str,filename,date_object_clip_start,np.NaN,np.NaN, np.NaN,np.NaN,np.NaN, np.NaN, np.NaN])
                        elif self.weatherSystem == "dmi":
                            results.append([start_d_str,filename,date_object_clip_start,np.NaN,np.NaN, np.NaN,np.NaN,np.NaN, np.NaN, np.NaN, np.NaN])
                    
                   
        df = pd.DataFrame(results)
        if self.weatherSystem == "meteostat":
            
            df.columns =['Folder name','Clip Name', 'DateTime', 'Temperature', 'Humidity','Precipitation', 'Dew Point', 'Wind Direction', 'Wind Speed', 'Pressure']
        elif self.weatherSystem == "dmi":
            df.columns =['Folder name','Clip Name', 'DateTime', 'Temperature', 'Humidity','Precipitation latest 10 min', 'Dew Point', 'Wind Direction', 'Wind Speed', 'Sun Radiation Intensity', 'Min of sunshine latest 10 min']
        
        df.to_csv(self.saveDir,index=False)

# Initialize the video manipulator class

In [17]:
print(f"Current working directory detect: {os.getcwd()}")
pattern = "*.mp4"

vidManipulator = VideoManipulation(pattern)

dir_raw_files = r"raw_files_directory"
dir_processed_files = r"main_output_directory"

input_paths = [f.path for f in os.scandir(dir_raw_files) if f.is_dir()]
print(input_paths)

# Get input video files and recode them to remove fragmentation

In [11]:
for path in input_paths:
    print(path)
    
    input_video_dir = path

    output_fixed_video_dir = os.path.join(dir_processed_files, "inputs_fixed")
    
    vidManipulator.fixClips(input_video_dir,output_fixed_video_dir)


# Get recoded video files and split them into days. Put them in separate directories

In [12]:
input_fixed_video_dir = output_fixed_video_dir
output_day_segmented_video_dir = os.path.join(dir_processed_files, "inputs_fixed_daily") 

if not os.path.exists(output_day_segmented_video_dir):
    os.makedirs(output_day_segmented_video_dir, exist_ok=True)

vidManipulator.splitIntoDays(input_fixed_video_dir, output_day_segmented_video_dir)


# Get the daily split video files and extract clip segments from them

In [13]:
input_day_segmented_video_dir = output_day_segmented_video_dir
output_segmented_clips_dir = os.path.join(dir_processed_files, "output_clips") 

skip = 14
splitLength = 120
vidManipulator.cutIntoClips(skip, splitLength, input_day_segmented_video_dir, output_segmented_clips_dir)

# Get the clip segments and remove color information from them leaving only a grayscale

In [15]:
input_segmented_clips_dir = output_segmented_clips_dir
output_grayscaled_clips_dir = os.path.join(dir_processed_files, "output_desat_clips")  

vidManipulator.desaturateClips(input_segmented_clips_dir, output_grayscaled_clips_dir)

# Initialize the weather gather class and create a metadata file containing weather information and timestamps for each clip

In [16]:
input_clips_dir = output_grayscaled_clips_dir
metadata_filename = "metadata.csv"
output_metadata_dir = os.path.join(input_clips_dir, metadata_filename) 


weatherGather = WeatherGather(pattern,input_clips_dir, output_metadata_dir, 'dmi')
weatherGather.createWeatherData()