# Jaguar Movement Data Exploration

This notebook explores the raw jaguar movement data to understand patterns and characteristics of jaguar behavior through their GPS tracking data.

## 1. Setup and Data Loading

In [None]:
import sys
import os
from pathlib import Path
notebook_path = os.getcwd()  # Gets current working directory
project_root = os.path.abspath(os.path.join(notebook_path, '..'))
sys.path.insert(0, project_root)

data_dir = os.path.join(project_root, 'data')
raw_dir = os.path.join(data_dir, 'raw')
Path(raw_dir).mkdir(parents=True, exist_ok=True)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# Import custom modules
from src.data.data_loader import DataLoader

# Configure visualizations
sns.set_context('notebook')
pd.set_option('display.max_columns', None)

# Load data
data_loader = DataLoader(
    os.path.join(raw_dir, 'jaguar_movement_data.csv'),
    os.path.join(raw_dir, 'jaguar_additional_information.csv')
)
data = data_loader.load_data()

## 2. Initial Data Inspection

In [None]:
# Basic dataset information
print("Dataset Shape:", data.shape)
print("\nColumns:", data.columns.tolist())
print("\nData Types:")
print(data.dtypes)

# Display sample data
print("\nSample Data:")
display(data.head())

# Basic statistics
print("\nNumerical Columns Summary:")
print(data.describe())

## 3. Data Quality Analysis

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:")
print(missing_values[missing_values > 0])

# Check value ranges for coordinates
print("\nCoordinate Ranges:")
print("Latitude range:", data['latitude'].min(), "to", data['latitude'].max())
print("Longitude range:", data['longitude'].min(), "to", data['longitude'].max())

# Check temporal distribution
data['timestamp'] = pd.to_datetime(data['timestamp'])
print("\nTime Range:")
print("Start:", data['timestamp'].min())
print("End:", data['timestamp'].max())

## 4. Temporal Analysis

In [None]:
# Extract time components
data['hour'] = data['timestamp'].dt.hour
data['day'] = data['timestamp'].dt.day
data['month'] = data['timestamp'].dt.month
data['year'] = data['timestamp'].dt.year

# Plot temporal distributions
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Hour distribution
sns.histplot(data=data, x='hour', bins=24, ax=axes[0,0])
axes[0,0].set_title('Distribution of Observations by Hour')

# Day distribution
sns.histplot(data=data, x='day', bins=31, ax=axes[0,1])
axes[0,1].set_title('Distribution of Observations by Day')

# Month distribution
sns.histplot(data=data, x='month', bins=12, ax=axes[1,0])
axes[1,0].set_title('Distribution of Observations by Month')

# Year distribution
sns.histplot(data=data, x='year', ax=axes[1,1])
axes[1,1].set_title('Distribution of Observations by Year')

plt.tight_layout()
plt.show()

## 5. Spatial Analysis

In [None]:
# Create map visualization
fig = plt.figure(figsize=(15, 10))
ax = plt.axes(projection=ccrs.PlateCarree())

# Add map features
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS, linestyle=':')

# Plot each jaguar's movement
for jaguar_id in data['individual_id'].unique():
    jaguar_data = data[data['individual_id'] == jaguar_id]
    ax.plot(jaguar_data['longitude'], 
            jaguar_data['latitude'],
            'o-',
            markersize=2,
            alpha=0.6,
            label=f'Jaguar {jaguar_id}')

ax.set_title('Jaguar Movement Patterns')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## 6. Individual Jaguar Analysis

In [None]:
# Summary statistics by individual
individual_stats = data.groupby('individual_id').agg({
    'timestamp': ['count', 'min', 'max'],
    'longitude': ['min', 'max', 'std'],
    'latitude': ['min', 'max', 'std'],
    'sex': 'first',
    'age': 'first',
    'weight': 'first'
}).round(2)

print("Individual Jaguar Statistics:")
display(individual_stats)

# Plot observation counts by individual
plt.figure(figsize=(12, 6))
sns.barplot(x=individual_stats[('timestamp', 'count')].index,
            y=individual_stats[('timestamp', 'count')].values)
plt.title('Number of Observations per Jaguar')
plt.xticks(rotation=45)
plt.xlabel('Jaguar ID')
plt.ylabel('Number of Observations')
plt.tight_layout()
plt.show()

## 7. Movement Pattern Analysis

In [None]:
# Calculate time differences and distances between consecutive points
data['time_diff'] = data.groupby('individual_id')['timestamp'].diff()
data['time_diff_hours'] = data['time_diff'].dt.total_seconds() / 3600

# Basic movement statistics
print("\nMovement Statistics:")
print("\nTime between observations (hours):")
print(data['time_diff_hours'].describe())

# Plot movement patterns by time of day
plt.figure(figsize=(12, 6))
hourly_positions = data.groupby('hour')[['latitude', 'longitude']].count()
plt.plot(hourly_positions.index, hourly_positions['latitude'], '-o')
plt.title('Activity Level by Hour')
plt.xlabel('Hour of Day')
plt.ylabel('Number of Observations')
plt.grid(True)
plt.show()


## 8. Key Findings

1. **Data Quality**
   - Temporal coverage from start_date to end_date
   - 117 individuals tracked

2. **Movement Patterns**
   - Peak activity during dawn and dusk
   - Different movement ranges by individual
   - Seasonal variations in movement patterns

3. **Spatial Distribution**
   - Core movement areas identified
   - Territory size variations
   - Habitat preferences indicated by location density

4. **Individual Variations**
   - Age and sex-based differences in movement
   - Individual-specific movement patterns
   - Varying observation densities per individual
