# How Does an Outlier Sound Like?

In [12]:
# Sonifying Outliers Data
# Reading outliers.csv and sonifying timestamp vs is_outlier

import pandas as pd
from astronify.series import SoniSeries
from astropy.table import Table
import plotly.graph_objects as go

# Read the outliers CSV file
df = pd.read_csv('outliers.csv')

In [13]:
# Prepare data for interactive Plotly scatter plot
df_plot = df.copy()
df_plot['last_update_time'] = pd.to_datetime(df_plot['last_update_time'])

# Create marker size based on outlier status (larger for outliers)
df_plot['marker_size'] = df_plot['is_old_outlier'].map({True: 8, False: 4})

# Create outlier status text for hover
df_plot['outlier_text'] = df_plot['is_old_outlier'].map(
    {True: 'OUTLIER', False: 'Normal'})

In [14]:
# Create scatter plot with provider-grouped legend
fig_grouped = go.Figure()

# Get unique providers
unique_providers = sorted(int(pid) for pid in df_plot['provider_id'].unique())

# Add traces for each provider (normal and outlier grouped together)
for provider_id in unique_providers:
    provider_data = df_plot[df_plot['provider_id'] == provider_id]
    provider_name = provider_data['provider_name'].iloc[0]
    color = provider_id

    # Normal data points
    normal_data = provider_data[provider_data['is_old_outlier'] == False]
    if len(normal_data) > 0:
        fig_grouped.add_trace(go.Scatter(
            x=normal_data['last_update_time'],
            y=normal_data['raw_odd'],
            mode='markers',
            name=f'{provider_name}',
            marker=dict(
                size=4,
                color=color,
                opacity=0.7,
                symbol='circle'
            ),
            hovertemplate=(
                '<b>%{text}</b><br>' +
                'Time: %{x}<br>' +
                'Raw Odd: %{y:.3f}<br>' +
                'Status: Normal<br>' +
                '<extra></extra>'
            ),
            text=[f'{provider_name} (ID: {provider_id})'] * len(normal_data),
            legendgroup=f'provider_{provider_id}',
            showlegend=True
        ))

    # Outlier data points (same legendgroup, no separate legend entry)
    outlier_data = provider_data[provider_data['is_old_outlier'] == True]
    if len(outlier_data) > 0:
        fig_grouped.add_trace(go.Scatter(
            x=outlier_data['last_update_time'],
            y=outlier_data['raw_odd'],
            mode='markers',
            name=f'{provider_name}',
            marker=dict(
                size=8,
                color=color,
                opacity=1.0,
                symbol='x',
                line=dict(width=2, color='red')
            ),
            hovertemplate=(
                '<b>%{text}</b><br>' +
                'Time: %{x}<br>' +
                'Raw Odd: %{y:.3f}<br>' +
                'Status: <b>OUTLIER</b><br>' +
                '<extra></extra>'
            ),
            text=[
                f'{provider_name} (ID: {provider_id}) - OUTLIER'] * len(outlier_data),
            legendgroup=f'provider_{provider_id}',  # Same group as normal data
            showlegend=False  # Don't show separate legend entry
        ))

# Update layout
fig_grouped.update_layout(
    title='Raw Odds Over Time by Provider<br><sub>Click provider names to toggle entire provider on/off</sub>',
    xaxis_title='Time',
    yaxis_title='Raw Odd Value',
    height=800,
    legend_title="Providers (click to toggle)",
    hovermode='closest'
)

fig_grouped.show()

In [None]:
PROVIDER_ID = 1

df_temp = df_plot[df_plot["provider_id"] ==
                  PROVIDER_ID].sort_values("last_update_time").copy().reset_index(drop=True)

# Convert boolean to numeric for pitch mapping (e.g., 0 = low, 1 = high)
df_temp["outlier_numeric"] = df_temp["is_old_outlier"].astype(int)

# Normalize time to 0–1
time_norm = (df_temp["last_update_time"].astype('int64') // 10**9)
time_norm = (time_norm - time_norm.min()) / (time_norm.max() - time_norm.min())

# Define a boost amount, e.g., 0.3 to add on top of time_norm for outliers
boost = 0.3

df_temp["pitch_value"] = time_norm + df_temp["outlier_numeric"] * boost

# Clip pitch_value to max 1.0 (to avoid exceeding max pitch)
df_temp["pitch_value"] = df_temp["pitch_value"].clip(upper=1.0)

# Create an Astropy Table for Astronify
table = Table()
table['time'] = time_norm

# Try both:
table['flux'] = df_temp["outlier_numeric"]
# table['flux'] = df_temp["pitch_value"]

# Create sonification object
sono = SoniSeries(table)

# Optional: pitch mapping
sono.pitch_mapper.min_pitch = "C3"
sono.pitch_mapper.max_pitch = "C6"

# Sonify
sono.sonify()

### ⚠️ Trigger Alert: This is NOT an alarm! <img src="img/pikud.png">

In [27]:
sono.play()
# sono.stop()
# sono.write(f"provider_{PROVIDER_ID}.wav")

Portmidi closed.
