In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/working/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("/kaggle/input/animal-behaviour-accelometere-data/abp_accel.csv")

In [None]:
df = df.sample(10000,replace=False)

In [None]:
df

# Data exploration before processing 

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.pairplot(df)

In [None]:
# Calculate magnitude of acceleration
df['magnitude'] = np.sqrt(df['x']**2 + df['y']**2 + df['z']**2)


In [None]:
sns.pairplot(df,hue="magnitude")

In [None]:
plt.bar(df['x'],df['magnitude'])

In [None]:
plt.bar(df['y'],df['magnitude'])

In [None]:
plt.bar(df['z'],df['magnitude'])

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
from datetime import datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')

In [None]:
df.info()

In [None]:
df.head(10)

# Converting unit from "mg" to "m/s^2"

In [None]:
# Standard acceleration due to gravity in m/s^2
standard_gravity = 9.81

# Convert acceleration columns to m/s^2
df['x (m/s^2)'] = df['x'] / 1000 / standard_gravity
df['y (m/s^2)'] = df['y'] / 1000 / standard_gravity
df['z (m/s^2)'] = df['z'] / 1000 / standard_gravity

# Drop the original acceleration columns
df.drop(['x', 'y', 'z'], axis=1, inplace=True)

In [None]:
df.head(10)

# Standard Scaler mean - 0 (zero) , standard deviation - 1

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['x (m/s^2)','y (m/s^2)','z (m/s^2)','magnitude']] = scaler.fit_transform(df[['x (m/s^2)','y (m/s^2)','z (m/s^2)','magnitude']])

In [None]:
df.head(10)

In [None]:
sns.pairplot(df,hue="magnitude")

# Applying Clustering

In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Assuming you have the feature columns in a variable 'features'
feature_cols = ['x (m/s^2)','y (m/s^2)','z (m/s^2)']

# Select the features for clustering
X = df[feature_cols]

# Determine the optimal number of clusters using the Elbow Method
wcss = []  # Within-Cluster-Sum-of-Squares (WCSS)

# Let's try clustering for a range of cluster numbers (e.g., 1 to 10)
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

In [None]:
plt.plot(range(1,11),wcss)

In [None]:
kmeans = KMeans(n_clusters=3, init='k-means++', random_state=42)
df['cluster'] = kmeans.fit_predict(X)

In [None]:
df.head(10)

In [None]:
# Assuming 'data' DataFrame contains the 'cluster' column from the previous clustering step

# Define behavior labels based on cluster numbers
behavior_labels = {
    0: 'Lying down/resting',
    1: 'Eating',
    2: 'Active/movement'
}

# Map cluster numbers to behavior labels
df['behavior'] = df['cluster'].map(behavior_labels)

# Display the updated DataFrame with behavior labels
df.head()

# NOW we can easily see the clusters 

In [None]:
sns.pairplot(df,hue="cluster")

In [None]:
plt.scatter(df['x (m/s^2)'], df['y (m/s^2)'], c=df['cluster'], cmap='viridis')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', label='Centroids')
plt.xlabel('x_mean')
plt.ylabel('y_mean')
plt.title('Clustering of Accelerometer Data')
plt.legend()
plt.show()