In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist, squareform
from sklearn.manifold import MDS

FILE_PATH = 'Processed Data/Aruba_17/pre_processed_data.csv'

# Read data
data_df = pd.read_csv(FILE_PATH, names=['Date', 'Time', 'Device ID', 'Status', 'Activity', 'Activity Status'])

# Forward-fill the 'Activity' column so that each sensor event has an associated activity
data_df['Activity'] = data_df['Activity'].fillna(method='ffill')

# Filter the data by sensor type
motion_sensors = data_df[data_df['Device ID'].str.startswith('M')]

# Create a combined pivot table with all sensor types
combined_pivot_table = motion_sensors.pivot_table(index='Activity', columns='Device ID', values='Status', aggfunc='count', fill_value=0)

# Normalize the pivot table by row
normalized_combined_pivot_table = combined_pivot_table.div(combined_pivot_table.sum(axis=1), axis=0)

# Calculate the correlation between activities and create a distance matrix (1 - correlation)
activity_distance_matrix = 1 - normalized_combined_pivot_table.T.corr()

In [None]:
# Apply hierarchical clustering
Z = linkage(squareform(activity_distance_matrix), method='ward')
dendrogram(Z, labels=activity_distance_matrix.index, leaf_rotation=90)

plt.title("Hierarchical Clustering of Activities")
plt.xlabel("Activity")
plt.ylabel("Distance")
plt.show()

In [None]:
# Apply MDS to cluster activities
embedding = MDS(n_components=2, dissimilarity='precomputed', random_state=42)
activity_coordinates = embedding.fit_transform(activity_distance_matrix)

# Create a dataframe with MDS coordinates for activities
activity_df = pd.DataFrame(activity_coordinates, columns=['x', 'y'], index=activity_distance_matrix.index)

# Calculate the average coordinates for each sensor
sensor_coordinates = pd.DataFrame(index=normalized_combined_pivot_table.columns, columns=['x', 'y'], dtype=float)

for sensor in sensor_coordinates.index:
    sensor_coordinates.loc[sensor] = (normalized_combined_pivot_table[sensor] * activity_df).sum() / normalized_combined_pivot_table[sensor].sum()

# Scale the sensor coordinates to be closer to their respective activities
scaling_factor = 0.5
scaled_sensor_coordinates = sensor_coordinates * scaling_factor

# Create a scatter plot
plt.figure(figsize=(12, 8))

# Plot motion sensors
sns.scatterplot(data=scaled_sensor_coordinates, x='x', y='y', color='blue', label='Motion Sensors')

# Plot activities
sns.scatterplot(data=activity_df, x='x', y='y', hue=activity_df.index, palette='tab10', marker='*', s=200, legend=False)

# Add labels
for i, sensor in enumerate(scaled_sensor_coordinates.index):
    plt.text(scaled_sensor_coordinates.loc[sensor, 'x'], scaled_sensor_coordinates.loc[sensor, 'y'], sensor, fontsize=9, ha='center', va='center')

for i, activity in enumerate(activity_df.index):
    plt.text(activity_df.loc[activity, 'x'], activity_df.loc[activity, 'y'], activity, fontsize=9, ha='center', va='center', fontweight='bold')

plt.xlabel('MDS Dimension 1')
plt.ylabel('MDS Dimension 2')
plt.title('MDS Plot of Activities and Sensors')
plt.legend(title='Sensor Type')

# Show the plot
plt.show()

In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import MDS

FILE_PATH = '../Processed Data/Aruba_17/pre_processed_data.csv'

# Read data
data_df = pd.read_csv(FILE_PATH, names=['Date', 'Time', 'Device ID', 'Status', 'Activity', 'Activity Status'])

# Forward-fill the 'Activity' column so that each sensor event has an associated activity
data_df['Activity'] = data_df['Activity'].fillna(method='ffill')

# Filter the data by sensor type
motion_sensors = data_df[data_df['Device ID'].str.startswith('M')]

# Create a combined pivot table with all sensor types
combined_pivot_table = motion_sensors.pivot_table(index='Activity', columns='Device ID', values='Status', aggfunc='count', fill_value=0)

# Normalize the pivot table by row
normalized_combined_pivot_table = combined_pivot_table.div(combined_pivot_table.sum(axis=1), axis=0)

# Compute MDS for activities
embedding_activity = MDS(n_components=2, random_state=42)
activity_coordinates = embedding_activity.fit_transform(normalized_combined_pivot_table)

# Compute MDS for sensors
embedding_sensor = MDS(n_components=2, random_state=42)
sensor_coordinates = embedding_sensor.fit_transform(normalized_combined_pivot_table.T)

# Create DataFrames for activities and sensors
activity_df = pd.DataFrame(activity_coordinates, columns=['x', 'y'], index=normalized_combined_pivot_table.index)
sensor_df = pd.DataFrame(sensor_coordinates, columns=['x', 'y'], index=normalized_combined_pivot_table.columns)

# Combine activity and sensor DataFrames
combined_df = pd.concat([activity_df.reset_index().rename(columns={'index': 'label'}),
                         sensor_df.reset_index().rename(columns={'index': 'label'})], ignore_index=True)

# Plot combined DataFrame
plt.figure(figsize=(12, 8))
sns.scatterplot(data=combined_df, x='x', y='y', hue='label', palette='tab10', marker='o', s=200)

# Add labels
for i, row in combined_df.iterrows():
    plt.text(row['x'], row['y'], row['label'], fontsize=9, ha='center', va='center', fontweight='bold')

plt.xlabel('MDS Dimension 1')
plt.ylabel('MDS Dimension 2')
plt.title('MDS Plot of Activities and Sensors')
plt.legend(title='Label')

# Show the plot
plt.show()

  data_df = pd.read_csv(FILE_PATH, names=['Date', 'Time', 'Device ID', 'Status', 'Activity', 'Activity Status'])


ValueError: Could not interpret value `label` for parameter `hue`

<Figure size 1200x800 with 0 Axes>