# Jaguar Movement Data Exploration

This notebook explores the raw jaguar movement data to understand patterns and characteristics of jaguar movements.

## Contents:
1. Data Loading and Initial Inspection
2. Data Quality Analysis
3. Basic Statistics
4. Movement Pattern Analysis
5. Individual Jaguar Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from pathlib import Path
from src.data.data_loader import DataLoader

# Configure visualizations
plt.style.use('seaborn')
sns.set_context('notebook')
pd.set_option('display.max_columns', None)

## 1. Data Loading and Initial Inspection

In [None]:
# Load data
data_loader = DataLoader(
    'data/raw/jaguar_movement_data.csv',
    'data/raw/jaguar_additional_information_2.csv'
)
data = data_loader.load_data()

print("Dataset Shape:", data.shape)
print("\nColumns:", data.columns.tolist())
print("\nData Types:\n")
print(data.dtypes)
print("\nSample Data:")
display(data.head())

## 2. Data Quality Analysis

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:")
print(missing_values[missing_values > 0])

# Check value ranges
print("\nNumerical Columns Summary:")
print(data.describe())

# Check categorical columns
print("\nCategorical Columns Summary:")
categorical_cols = data.select_dtypes(include=['object']).columns
for col in categorical_cols:
    print(f"\n{col} value counts:")
    print(data[col].value_counts())

## 3. Temporal Analysis

In [None]:
# Convert timestamp to datetime if not already
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Extract time components
data['hour'] = data['timestamp'].dt.hour
data['day'] = data['timestamp'].dt.day
data['month'] = data['timestamp'].dt.month
data['year'] = data['timestamp'].dt.year

# Plot temporal distributions
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Hour distribution
sns.histplot(data=data, x='hour', bins=24, ax=axes[0,0])
axes[0,0].set_title('Distribution of Observations by Hour')

# Day distribution
sns.histplot(data=data, x='day', bins=31, ax=axes[0,1])
axes[0,1].set_title('Distribution of Observations by Day')

# Month distribution
sns.histplot(data=data, x='month', bins=12, ax=axes[1,0])
axes[1,0].set_title('Distribution of Observations by Month')

# Year distribution
sns.histplot(data=data, x='year', ax=axes[1,1])
axes[1,1].set_title('Distribution of Observations by Year')

plt.tight_layout()
plt.show()

## 4. Spatial Analysis

In [None]:
# Create map visualization
fig = plt.figure(figsize=(15, 10))
ax = plt.axes(projection=ccrs.PlateCarree())

# Add map features
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS, linestyle=':')

# Plot each jaguar's movement
for jaguar_id in data['individual_id'].unique():
    jaguar_data = data[data['individual_id'] == jaguar_id]
    ax.plot(jaguar_data['longitude'], 
            jaguar_data['latitude'],
            'o-',
            markersize=2,
            alpha=0.6,
            label=f'Jaguar {jaguar_id}')

ax.set_title('Jaguar Movement Patterns')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## 5. Individual Jaguar Analysis

In [None]:
# Summary statistics by individual
individual_stats = data.groupby('individual_id').agg({
    'timestamp': ['count', 'min', 'max'],
    'longitude': ['min', 'max', 'std'],
    'latitude': ['min', 'max', 'std'],
    'sex': 'first',
    'age': 'first',
    'weight': 'first'
}).round(2)

print("Individual Jaguar Statistics:")
display(individual_stats)

# Plot observation counts by individual
plt.figure(figsize=(12, 6))
sns.barplot(x=individual_stats[('timestamp', 'count')].index,
            y=individual_stats[('timestamp', 'count')].values)
plt.title('Number of Observations per Jaguar')
plt.xticks(rotation=45)
plt.xlabel('Jaguar ID')
plt.ylabel('Number of Observations')
plt.tight_layout()
plt.show()