# Combine classification data

The classification process used to label the drone footage in this project resulted in ~1000 individual classification json files. To perform pre-processing (including validating that the images classified by multiple users were classified in a simlar method), the files are all combined into a single dataframe.

This takes a long time (approximately 1 hour on a desktop machine), therefore the resulting dataframe is picked to a file to allow for resumption of validation and inspection work without the need to rerun this time-consuming process

In [1]:
import pandas as pd
import json

In [None]:
from os import listdir
from os.path import isfile, join

# Function to read the json file and convert the dictionary object in the 'classifiedData' column
# into seperate columns
def load_classification_file(file_name):
    data = pd.read_json(file_name) 
    map_to_dict = data['classifiedData'].map(lambda x : dict(x))
    expanded_cols = map_to_dict.apply(pd.Series)
    return pd.concat([data, expanded_cols], axis=1).drop('classifiedData', axis=1)

frames = None
# Read all files in the classified sibling folder
filenames = [f for f in listdir('../../Texture_Repo/Donegal_Rural_Terrain_Textures/classified') 
             if isfile(join('../../Texture_Repo/Donegal_Rural_Terrain_Textures/classified', f))]
for f in filenames:    
    print('Processing ' + f)
    frame = load_classification_file(join('../../Texture_Repo/Donegal_Rural_Terrain_Textures/classified', f))    
    frames = pd.concat([frames, frame])

Processing user1~DJI_00991600.json
Processing user1~DJI_00991700.json
Processing user1~DJI_00991800.json
Processing user1~DJI_00991900.json
Processing user1~DJI_0099200.json
Processing user1~DJI_00992000.json
Processing user1~DJI_00992100.json
Processing user1~DJI_00992200.json
Processing user1~DJI_00992300.json
Processing user1~DJI_00992400.json
Processing user1~DJI_00992500.json
Processing user1~DJI_00992600.json
Processing user1~DJI_00992700.json
Processing user1~DJI_00992800.json
Processing user1~DJI_00992900.json
Processing user1~DJI_0099300.json
Processing user1~DJI_00993000.json
Processing user1~DJI_00993100.json
Processing user1~DJI_00993200.json
Processing user1~DJI_00993300.json
Processing user1~DJI_00993400.json
Processing user1~DJI_00993500.json
Processing user1~DJI_00993700.json
Processing user1~DJI_0099400.json
Processing user1~DJI_00994100.json
Processing user1~DJI_00994300.json
Processing user1~DJI_0099800.json
Processing user1~DJI_01060.json
Processing user1~DJI_010610

In [None]:
# Pickle the combined data frames into the classified folder
frames.to_pickle('../../Texture_Repo/Donegal_Rural_Terrain_Textures/classified/all_data.pkl')