# Pool together Results

In [None]:
# get list of files recursively
import os
import pandas as pd
import numpy as np

data_dir = '/bask/homes/f/fspo1218/amber/projects/object-store-scripts/data'
results = []
cols = ['path', 'inference datetime', 'country', 'deployment name', 'binary class', 'binary confidence',
        'xmin', 'ymin', 'xmax',  'ymax', 'species class', 'species confidence']

for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file == 'results.csv':
            df_temp = pd.read_csv(os.path.join(root, file))
            header = pd.DataFrame(df_temp.columns)
            header.index = cols
            df_temp.columns = cols
            df_temp = pd.concat([header.T, df_temp], ignore_index=True, axis=0)

            results.append(df_temp)


# append all dataframes in results list
all_results = pd.concat(results, ignore_index=True)

In [None]:
all_results.loc[~all_results['species class'].isna() & all_results['species class'].str.contains('Unnamed'), 'species confidence'] = np.nan
all_results.loc[~all_results['species class'].isna() & all_results['species class'].str.contains('Unnamed'), 'species class'] = np.nan
all_results['deployment_id'] = all_results['path'].str.split('/').str[-3]
all_results['image_time'] = all_results['path'].str.split('/').str[-1].str.split('-').str[-2]
all_results['image_date'] = all_results['path'].str.split('/').str[-1].str.split('-').str[-2]
all_results['image_date'] = all_results['image_date'].str[:4] + '-' + all_results['image_date'].str[4:6] + '-' + all_results['image_date'].str[6:8]
all_results['image_time'] = all_results['image_time'].str[-6:-4] + ':' + all_results['image_time'].str[-4:-2] + ':' + all_results['image_time'].str[-2:]
all_results['path'] = all_results['path'].str.split('/').str[-3:].str.join('/')

In [None]:
all_results

In [None]:
all_results.columns

In [None]:
all_results = all_results[['country', 'deployment name', 'deployment_id', 'image_time',
       'image_date',
       'binary class', 'binary confidence', 'xmin', 'ymin', 'xmax', 'ymax',
       'species class', 'species confidence', 'path', 'inference datetime']]

In [None]:
all_results.head()

In [None]:
all_results['binary class'].value_counts()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context("paper")

fig, ax = plt.subplots(figsize=(3, 5))

sns.countplot(data=all_results, x='binary class')
plt.xticks(rotation=45)
plt.title('Predicted Object Classes')
plt.show()
#plt.savefig('/bask/homes/f/fspo1218/amber/projects/object-store-scripts/outputs/binary_class.png', dpi=500, bbox_inches='tight', pad_inches=0.1)

In [None]:
temp = all_results['deployment name'].value_counts().reset_index()

# plot the number of objects for each deployment
fig, ax = plt.subplots(figsize=(3, 3))

sns.barplot(temp, x='deployment name', y='count')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Number of Objects Detected')
plt.xlabel('Deployment Name')
plt.title('Number of Objects Detected per Deployment')
plt.show()


In [None]:
temp = all_results['path'].value_counts().reset_index()

# plot a histogram of the count
fig, ax = plt.subplots(figsize=(3, 3))

sns.histplot(temp, x='count', bins=10)
plt.xlabel('Number of Objects per Image')
plt.ylabel('Number of Images')
plt.title('The number of Objects Detected per Image')
plt.show()

In [None]:
all_results['image_timestamp'] = all_results['image_time'] + ' ' + all_results['image_date']
all_results['image_timestamp'] = pd.to_datetime(all_results['image_timestamp'], format='%H:%M:%S %Y-%m-%d')

In [None]:
all_results.head()


In [None]:
import datetime

# sort by deployment and image timestamp
subset = all_results[['deployment name', 'image_timestamp', 'path']]

# drop duplicates
subset = subset.drop_duplicates()

def subset_by_interval(deployment, interval):
    #deployment = 'Swamp - 5794FB2E'
    subset2 = subset.loc[subset['deployment name'] == deployment, ]

    # sort by image_timestamp
    subset2 = subset2.sort_values('image_timestamp')


    first_timestep = subset2['image_timestamp'].iloc[0]
    last_timestep = subset2['image_timestamp'].iloc[-1]

    # get the timestamps for intervals between the first and last timestamp
    intervals = pd.date_range(start=first_timestep, end=last_timestep, freq=interval)
    #print(subset2.shape)

    subset2 = subset2.loc[subset2['image_timestamp'].isin(intervals), ]
    #print(subset2.shape)

    return subset2

interval_images = []
for deployment in all_results['deployment name'].unique():
    interval_images.append(subset_by_interval(deployment, '1min'))

interval_images = pd.concat(interval_images, ignore_index=True)

print(all_results.shape)
print(interval_images.shape)

In [None]:
interval_images['deployment name'].value_counts()

In [None]:
temp = interval_images['deployment name'].value_counts().reset_index()

# plot the number of objects for each deployment
fig, ax = plt.subplots(figsize=(3, 3))

sns.barplot(temp, x='deployment name', y='count')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Number of Objects Detected')
plt.xlabel('Deployment Name')
plt.title('Number of Objects Detected per Deployment')
plt.show()

In [None]:
temp

In [None]:
temp2 = all_results[all_results['deployment name'] == 'Wetland - DC049A28']

# sort by deployment and image timestamp
subset = temp2[['deployment_id', 'image_timestamp', 'path']]

# drop duplicates
subset = subset.drop_duplicates()

# for each depolyment, add column for the timestamp of the row above
subset['prev_timestamp'] = subset.groupby('deployment_id')['image_timestamp'].shift(1)

# add column for time difference between the current and previous timestamp
subset['time_diff'] = subset['image_timestamp'] - subset['prev_timestamp']

# subset to only rows where the time difference is greater than 10 minutes, or NaT
# subset = subset[(subset['time_diff'] > datetime.timedelta(minutes=1)) | (subset['time_diff'].isna())]

# subset = all_results.loc[all_results['path'].isin(subset['path'])]

subset


# temp2#['path'].value_counts()