In [None]:
import pandas as pd
import numpy as np

import seaborn as sns

In [None]:
meta = pd.read_pickle("./data/circadian_metadata.pkl")

In [None]:
data = pd.read_pickle("./data/circadian.pkl")

In [None]:
data

In [None]:
data.describe()

In [None]:
data[data['id'] == '2017-01-16 08:00:00|circadian.txt|01']

In [None]:
data['x_diff'] = abs(data['x_pos'].diff())

In [None]:
data['t_diff'] = data['t'].diff()

In [None]:
data['velocity'] = data['x_diff'] / data['t_diff']

In [None]:
data['moving'] = np.where(data['velocity'] > 2, True, False)

In [None]:
def find_velocity(df, velocity_value = 2):
    
    df['x_diff'] = abs(df['x_pos'].diff())
    df['t_diff'] = df['t'].diff()
    df['velocity'] = round(df['x_diff'] / df['t_diff'], 3)
    df['moving'] = np.where(df['velocity'] > velocity_value, True, False)

    return df

In [None]:
from functools import partial

In [None]:
mov_df = data.groupby('id', group_keys=False)[['id', 't', 'x_pos']].apply(
    partial(find_velocity, velocity_value = 2)
)

In [None]:
result_df = data.copy()

result_df['velocity'] = np.nan

for series_id in result_df['id'].unique():
    mask = result_df['id'] == series_id
    series_data = result_df[mask].copy()
    
    pos_diff = abs(series_data['x_pos'].diff())
    time_diff = series_data['t'].diff()
    velocity_calc = pos_diff / time_diff
    
    result_df.loc[mask, 'velocity'] = velocity_calc.round(3)

velocity_value = 2

result_df['moving'] = np.where(result_df['velocity'] > velocity_value, True, False)


In [None]:
velocity_calc.index

In [None]:
result_df

In [None]:
mov_df

In [None]:
all(mov_df['velocity'][mov_df['id'] == '2017-01-16 08:00:00|circadian.txt|01'].values[1:] == result_df['velocity'][mov_df['id'] == '2017-01-16 08:00:00|circadian.txt|01'].values[1:])

### Plotting attempts

In [None]:
sns.lineplot(mov_df, x = 't', y = 'velocity', hue = 'id');

In [None]:
sns.boxplot(mov_df, x = 'id', y = 'velocity', hue = 'id');

### But we're atually interested in their experimental groups, not the individual

In [None]:
map_dict = meta['exp_group'].to_dict()

In [None]:
mov_df['group'] = mov_df['id'].map(map_dict)

In [None]:
mov_df.head()

In [None]:
sns.boxplot(mov_df, x = 'group', y = 'velocity', hue = 'group');

### Be careful of averaging all data points and not the average per specimen

In [None]:
avg_df = mov_df.groupby('id').mean()

In [None]:
avg_df

In [None]:
avg_df['group'] = avg_df['group'].astype(int)

In [None]:
sns.boxplot(avg_df, x = 'group', y = 'velocity', hue = 'group')

## Plotting as a time series

In [None]:
from math import floor
import matplotlib.pyplot as plt 

In [None]:
bin_time = 40 * 60 # 10 minutes

mov_df['t_round'] = mov_df['t'].map(
            lambda t: bin_time * floor(t / bin_time)
        )

In [None]:
mov_df['t_hours'] = mov_df['t_round'] / (60*60)

In [None]:
def start_0(data):

    filt = data[data['t_hours'] >= 24]
    filt['t_hours'] = filt['t_hours'] - 24
    filt

    return filt

In [None]:
filt_df = mov_df.groupby('id', group_keys=False).apply(
    start_0)

In [None]:
filt_df

In [None]:
avg_df = filt_df.groupby(['id', 't_hours'])[['velocity', 'moving']].mean()

In [None]:
avg_df

In [None]:
sns.lineplot(avg_df, x = 't_hours', y = 'velocity', hue = 'id')

In [None]:
avg_df['group'] = avg_df.index.get_level_values(0).map(map_dict)

In [None]:
avg_df

In [None]:
sns.lineplot(avg_df, x = 't_hours', y = 'velocity', hue = 'group', palette = 'Set2', linewidth = 0.8)

In [None]:
filt_df

In [None]:
fig, axes = plt.subplots(figsize=(14, 10))

# Colour dictionary
colors = {1 : 'red', 2 : 'blue', 3 : 'green'}

for group in set(filt_df['group']):

    group_data = filt_df[filt_df['group'] == group]
    
    # Average across individuals in the group
    avg_activity = group_data.groupby('t_hours')['moving'].agg(['mean', 'std', 'count']).reset_index()
    
    # Calculate standard error 
    avg_activity['sem'] = avg_activity['std'] / np.sqrt(avg_activity['count'])
        
    # Plot line
    axes.plot(avg_activity['t_hours'], avg_activity['mean'], 
              label=f'Group {group}', linewidth=1, color = colors[group])
    
    # Fill between with the same colour
    axes.fill_between(avg_activity['t_hours'], 
                      np.maximum(0, avg_activity['mean'] - avg_activity['sem']),
                      np.minimum(1, avg_activity['mean'] + avg_activity['sem']), 
                      alpha=0.3, color = colors[group])

axes.set_xlabel('Time (hours)')
axes.set_ylabel('Fraction of time moving')
axes.set_title('Activity Patterns Over Time')
axes.legend()
axes.grid(True, alpha=0.3)
plt.show()