# Employee Misconduct Data Visualization (20 Rows)

This notebook loads the `misconduct_data_20.csv` file with 20 rows and creates the following plots:
- Bar: Frequency of each behavior
- Pie: Proportion of each behavior
- Histogram: Distribution of EAR
- Scatter: EAR vs MAR by behavior
- Pairplot: Relationships between EAR, MAR, and head bend ratio
- Confusion Matrix: Predicted vs ground truth behaviors
- ROC Curve: Misconduct vs no misconduct

Ensure `misconduct_data_20.csv` is in the same directory as this notebook.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve, auc
%matplotlib inline

# Load the CSV file
df = pd.read_csv('misconduct_data_20.csv')

# Display all rows
df

## Bar Plot: Behavior Frequency

In [None]:
behavior_counts = df['behavior'].value_counts().reset_index()
behavior_counts.columns = ['Behavior', 'Count']
fig = px.bar(behavior_counts, x='Behavior', y='Count', title='Behavior Frequency',
             color='Behavior', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(xaxis_title='Behavior', yaxis_title='Count')
fig.show()
# Save plot
fig.write_image('bar_plot.png')

## Pie Chart: Behavior Distribution

In [None]:
fig = px.pie(behavior_counts, names='Behavior', values='Count', title='Behavior Distribution',
             color_discrete_sequence=px.colors.qualitative.Plotly)
fig.show()
# Save plot
fig.write_image('pie_chart.png')

## Histogram: EAR Distribution

In [None]:
fig = px.histogram(df, x='ear', nbins=10, title='EAR Distribution',
                   color_discrete_sequence=['#636EFA'])
fig.update_layout(xaxis_title='EAR', yaxis_title='Frequency')
fig.show()
# Save plot
fig.write_image('histogram.png')

## Scatter Plot: EAR vs MAR by Behavior

In [None]:
fig = px.scatter(df, x='ear', y='mar', color='behavior', size_max=10,
                 title='EAR vs MAR by Behavior', hover_data=['timestamp'],
                 color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(xaxis_title='EAR', yaxis_title='MAR')
fig.show()
# Save plot
fig.write_image('scatter_plot.png')

## Pairplot: Relationships between EAR, MAR, and Head Bend Ratio

In [None]:
plot_df = df[['ear', 'mar', 'head_bend_ratio', 'behavior']]
sns.pairplot(plot_df, hue='behavior', diag_kind='hist', palette='Set2')
plt.suptitle('Pairplot of EAR, MAR, and Head Bend Ratio', y=1.02)
plt.show()
# Save plot
plt.savefig('pairplot.png', bbox_inches='tight')
plt.close()

## Confusion Matrix: Predicted vs Ground Truth Behaviors

In [None]:
cm = confusion_matrix(df['ground_truth_behavior'], df['behavior'])
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=sorted(set(df['behavior'])),
            yticklabels=sorted(set(df['ground_truth_behavior'])))
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
# Save plot
plt.savefig('confusion_matrix.png', bbox_inches='tight')
plt.close()

## ROC Curve: Misconduct vs No Misconduct

In [None]:
binary_true = [1 if label != 'Awake' else 0 for label in df['ground_truth_behavior']]
binary_pred = [1 if label != 'Awake' else 0 for label in df['behavior']]
fpr, tpr, _ = roc_curve(binary_true, binary_pred)
roc_auc = auc(fpr, tpr)
fig = go.Figure()
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC curve (AUC = {roc_auc:.2f})'))
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random'))
fig.update_layout(title='ROC Curve (Misconduct vs No Misconduct)',
                  xaxis_title='False Positive Rate',
                  yaxis_title='True Positive Rate')
fig.show()
# Save plot
fig.write_image('roc_curve.png')