<a href="https://colab.research.google.com/github/MScEcologyAndDataScienceUCL/BIOS0032_AI4Environment/blob/main/8_AI_and_Movement_Data/TrackingAnalysis_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mapping tracking data with R (and a bit of Python) in Colab 


(redeveloped from an R script also available here: https://www.dropbox.com/sh/5wd1fkwcompvsyr/AAAzQH3bOHDK-eCrUXaNTQkVa?dl=0)

# Setup

First we need to make sure the right version of rpy2 is installed.



In [None]:
!pip install rpy2==3.5.1

Then we can load the r2py extension to allow us to use R in Colab

In [None]:
#%load_ext rpy2.ipython

%reload_ext rpy2.ipython
%config IPCompleter.greedy=True
%config InlineBackend.figure_format = 'retina'

# Get Data

Download the tracking data to the colab environment

* We're downloading a CSV file from my dropbox using wget, we could also upload a file directly to map different data

In [None]:
!wget -O Mres_TrackingData_2022.csv https://www.dropbox.com/s/8x1kvzkgd9rv2hi/Mres_TrackingData_2022.csv?dl=0

# Quick exploratory plot

Load the tracking data and make a quick plot of all the tracks..

**Remember to include '%%R' at the start of a chunk when we're using R code**

In [None]:
%%R

dat <- read.csv("Mres_TrackingData_2022.csv", as.is=T)
#dat = subset(dat, name != "CL")

library(ggplot2)

# Quick plot of all tracks
ggplot(dat, aes(x=long, y=lat, group=name, color=name)) + 
  geom_point() + facet_wrap(~name)

# Explore the data

Let's have a quick look at the top of the dataframe

In [None]:
%%R
head(dat)

In [None]:
%%R
summary(dat)

Which individuals do we have?

In [None]:
%%R

unique(dat$name)

Let's plot one of them...

In [None]:
%%R

one_individual = subset(dat, name == "AS")

library(ggplot2)

# Quick plot of all tracks
ggplot(one_individual, aes(x=long, y=lat, group=name, color=name)) + 
  geom_point() + 
  theme_bw() + 
  coord_equal()

# Speed

We can explore the speed of individuals, comparing males and females

In [None]:
%%R
# Subset data for females
female <- subset(dat, gender == "F")

# Subset data for males
male <- subset(dat, gender == "M")

# Summary stats
summary(female$speed_ms, na.rm = T)
summary(male$speed_ms, na.rm = T)

# Histograms
hist(male$speed_ms, 100)
hist(female$speed_ms, 100)
par(mfrow=c(2, 1))
hist(male$speed_ms, 100)
hist(female$speed_ms, 100)

These distributions are often very skewed and log-transforming may let us see the distribution better

In [None]:
%%R

# Log-transformed

hist(log10(male$speed_ms), 100)
hist(log10(female$speed_ms), 100)
par(mfrow=c(2, 1))
hist(log10(male$speed_ms), 100)
hist(log10(female$speed_ms), 100)

We can also do these kind of plots in ggplot to make them a bit fancier...

In [None]:
%%R

# Use ggplot to do some fancy plots
m <- ggplot(dat, aes(x=speed_ms, group=gender, color=gender, fill=gender)) + 
geom_histogram(binwidth = 0.1, alpha=0.6)
m

Again, log-transformaing can make things easier to see... and it's easy to get the log-axis labels clearer in ggplot

In [None]:
%%R 

# Might be useful to log transform data
# **Note - where did the zeros go? **
m <- ggplot(dat, aes(x=speed_ms, group=gender, color=gender, fill=gender)) + 
geom_histogram(binwidth = 0.1, alpha=0.6) + 
scale_x_log10()
m

# Speeds per individual

Get some summary stats for each individual

In [None]:
%%R 

# Get mean speed for each team
sort(tapply(dat$speed_ms, dat$name, mean, na.rm=T))

# Who's in the data
individuals = unique(dat$name)
print(individuals)
n <- subset(dat, name == "AS")
k <- subset(dat, name == "BH")

# Get their speed
n_speed <- n$speed_ms
k_speed <- k$speed_ms

# Get mean speed
print(mean(n_speed, na.rm=T))
print(mean(k_speed, na.rm=T))

Use ggplot to do fancy speed plots for each person...

In [None]:
%%R
# Use ggplot to do some fancy plots for each person
m <- ggplot(dat, aes(x=speed_ms, color=name))
m <- m + geom_histogram(aes(y=..density..), binwidth=0.1, position="identity", alpha=0.5) + 
    geom_density(alpha=0.6) + 
    facet_grid(name ~ .)
m

In [None]:
%%R

# log-transform...
m <- ggplot(dat, aes(x=speed_ms, fill=name)) + 
  geom_histogram(bins = 100) + 
  scale_x_log10() + 
  facet_grid(name ~ .)
m

In [None]:
%%R

# Could also remove all the zeros
dat_non_stationary <- subset(dat, speed_ms > 0)

# Same again with only moving data
m <- ggplot(dat_non_stationary, aes(x=speed_ms, color=name, fill=name))
m <- m + geom_histogram(bins = 100) + 
  facet_grid(name ~ ., scales='free') +
  scale_x_continuous(limits=c(0,5)) 
m

What was the mean speed for each individual? (Now working with only moving data)

In [None]:
%%R

# What was the mean speed for each individual? (Now working with only moving data)
library(dplyr)

# Here we use dplyr to group the data by name and calculate some summary stats for each individual/name
summary_dat <- dat_non_stationary %>%
                group_by(name) %>%
                summarise(
                  N = length(speed_ms),
                  mean_speed = mean(speed_ms, na.rm = T),
                  median_speed = median(speed_ms, na.rm = T),
                  mean_distance = mean(distance_km, na.rm = T)
                ) %>%
                arrange(-mean_speed)

summary_dat

Plot who's the fastest

In [None]:
%%R

ggplot(summary_dat, aes(x=reorder(name, -mean_speed), y=mean_speed, fill=name)) + 
geom_bar(stat="identity") + 
scale_fill_brewer(palette = "Spectral")

# Interactive maps

Install some python packages to make interactive maps

In [None]:
!pip install -q ipyleaflet

from google.colab import output

output.enable_custom_widget_manager()

In [None]:
!pip install geopandas

Interactive map showing tracks

In [None]:
import geopandas
import numpy as np
import pandas as pd
from ipyleaflet import GeoData, GeoJSON, Map, basemaps

m = Map(center=(51.5354, -0.15), zoom=15, basemap=basemaps.Esri.WorldTopoMap)

# Load tracking data
df = pd.read_csv("/content/Mres_TrackingData_2022.csv")

# Convert to geopandas
gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.long, df.lat))

# Then geodata
geo_data = GeoData(
    geo_dataframe=gdf,
    hover_style={"fillColor": "red", "fillOpacity": 0.2},
    point_style={"radius": 2, "fillOpacity": 0.8, "fillColor": "blue", "weight": 3},
    name="Tracks",
)

# Add to map
m.add_layer(geo_data)
m

More complex interactive map showing tracks colored by individual

In [None]:
import geopandas
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ipyleaflet import GeoData, GeoJSON, LayersControl, Map
from matplotlib.colors import rgb2hex

# Load tracking data
df = pd.read_csv("/content/Mres_TrackingData_2022.csv")

m = Map(center=(51.5354, -0.15), zoom=15)

df_grouped = df.groupby("name")

cmap = plt.get_cmap("viridis")
colors = cmap(np.linspace(0, 1, len(df_grouped.groups)))
colors[1]

c = 0
for group_name, df_group in df_grouped:
    print("\nAdding data for {}, color {}".format(group_name, rgb2hex(colors[c])))

    # Convert to geopandas
    gdf = geopandas.GeoDataFrame(
        df_group, geometry=geopandas.points_from_xy(df_group.long, df_group.lat)
    )

    # Then geodata
    geo_data = GeoData(
        geo_dataframe=gdf,
        point_style={
            "radius": 2,
            "fillOpacity": 0.4,
            "color": rgb2hex(colors[c]),
            "weight": 3,
        },
        name=group_name,
    )
    c = c + 1

    # Add to map
    m.add_layer(geo_data)


m.add_control(LayersControl())
m.layout.height = "700px"

m

# Other problems

Some problems:
* Who is the fastest 'group'
* Are altitude measurements reliable?
* How would you calculate 'residency time' (clue.. think about the space as a grid)
* Data for multiple sessions are avialable here: https://www.dropbox.com/sh/kokqn4a60tpucaa/AACa6zhKAC_Zu_P7qUhZkKCMa?dl=0 (which individual was fastest across all sessions)
