# Event Detection Model


## Visualize Data

In [1]:
# Import core libraries
import numpy as np
import scipy as sp
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="darkgrid", rc=custom_params, palette="pastel")

# Load datasets
df_chatter = pd.read_csv('num_tweets.csv')
df_chatter['Month'] = pd.to_datetime(df_chatter['Month'])

df_chatter.describe()

FileNotFoundError: [Errno 2] No such file or directory: 'num_tweets.csv'

Peak Detection

In [None]:
import plotly.graph_objects as go
from scipy.signal import find_peaks

x = df_chatter['Month']  # Convert to seconds (UNIX epoch start)
# x = x.values.reshape(-1, 1)
y = df_chatter['Tweet Count']

peaks, _ = find_peaks(df_chatter['Tweet Count'],
                      # height=0.2e6,  # height of peaks
                      width=2)       # width of peaks
                      # threshold=1e3, # vertical distance to its neighboring samples
                      # distance=30,   # minimal horizontal distance (>= 1) in samples between neighbouring peaks
                      # prominence=1e3) # vertical distance between the peak and its lowest contour line


# Extract event timestamps
events = df_chatter.iloc[peaks]

# Plot peaks
fig = go.Figure()

# Original data
fig.add_trace(go.Scatter(
  x=df_chatter.index,
  y=df_chatter['Tweet Count'],
  hovertext=df_chatter['Month'].dt.strftime('%Y-%m'),
  mode='lines',
  name='Original Data'))

# Peaks
fig.add_trace(go.Scatter(
  x=events.index,
  y=events['Tweet Count'],
  hovertext=events['Month'].dt.strftime('%Y-%m'),
  mode='markers',
  name='Peaks',
  marker=dict(
    color='red',
    size=8,
    symbol='x')))

fig.update_layout(height=600,
                  title='Tweet Count Peaks',
                  xaxis_title='Month',
                  yaxis_title='Tweet Count')

fig.update_xaxes(tickmode='linear',
                 dtick='M1',
                 tickformat='%Y-%m')

fig.show()


Change Point

In [None]:
%%capture
!pip install ruptures

In [None]:
import ruptures as rpt
import math
import numpy as np
import plotly.graph_objects as go

data = df_chatter['Tweet Count'].values

# Change-point detection via Pelt algorithm
model = "rbf"
algo = rpt.Pelt(model=model).fit(data)
result = algo.predict(pen=0.7)  # Adjust the penalty value based on your data

fig = go.Figure()

# Original data
fig.add_trace(go.Scatter(
    x=df_chatter.index,
    y=df_chatter['Tweet Count'],
    hovertext=df_chatter['Month'].dt.strftime('%Y-%m'),
    mode='lines',
    name='Original Data'))

# Change points
for cp_index in result[:-1]:
    x_1 = df_chatter.index[cp_index]
    y_1, y_2, y_3, y_4, y_5 = -1, 0, 2, 4, 5
    y_6, y_7, y_8, y_9, y_10 = 6, 8, 10, 13, 15

    fig.add_trace(go.Scatter(
        x=[x_1, x_1, x_1, x_1, x_1, x_1, x_1, x_1, x_1, x_1],
        y=[y_1, y_2, y_3, y_4, y_5, y_6, y_7, y_8, y_9, y_10],
        mode='lines',
        name=df_chatter['Month'].dt.strftime('%Y-%m')[cp_index],
        hoverinfo='text',
        hovertext='Change at<br>%s' % df_chatter['Month'].dt.strftime('%Y-%m')[cp_index],
        line=dict(color='red', width=1, dash='dash'),
        showlegend=True  # Show trace label in the legend
    ))

# Highlight July 2020
july_2020_index = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2020-07'].index[0]
fig.add_trace(go.Scatter(
    x=[july_2020_index, july_2020_index],
    y=[-1, 15],
    mode='lines',
    name='',
    hoverinfo='text',
    hovertext='Highlight: July 2020',
    line=dict(color='green', width=2, dash='dot'),
    showlegend=True  # Show trace label in the legend
))

fig.update_layout(height=600,
                  title='Tweet Count Change Points',
                  xaxis_title='Month',
                  yaxis_title='Tweet Count',
                  yaxis_range=[-1, 15])

# Update x-axis tick values and text
xtv = np.arange(len(df_chatter))
xtt = df_chatter['Month'].dt.strftime('%Y-%m').values
fig.update_xaxes(tickmode='array',
                 tickvals=xtv,
                 ticktext=xtt)

fig.show()


In [None]:
import plotly.graph_objects as go
from scipy.signal import find_peaks
import ruptures as rpt
import numpy as np

data = df_chatter['Tweet Count'].values

# Change-point detection via Pelt algorithm
model = "rbf"
algo = rpt.Pelt(model=model).fit(data)
result = algo.predict(pen=0.5)  # Adjust the penalty value based on your data

# Find peaks
peaks, _ = find_peaks(df_chatter['Tweet Count'], width=1.25)
events = df_chatter.iloc[peaks]

fig = go.Figure(layout=dict(width=2000))

# Original data
fig.add_trace(go.Scatter(
    x=df_chatter.index,
    y=df_chatter['Tweet Count'],
    hovertext=df_chatter['Month'].dt.strftime('%Y-%m'),
    mode='lines',
    name='Original Data'))



# Highlight July 2020
july_2020_index = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2020-07'].index[0]
fig.add_trace(go.Scatter(
    x=[july_2020_index, july_2020_index],
    y=[-1, 15],
    mode='lines',
    name='Anti-Terror Law<br>Effective',
    hoverinfo='text',
    hovertext='Highlight: July 2020',
    line=dict(color='blue', width=5),
    showlegend=True  # Show trace label in the legend
))

senate = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2020-02'].index[0]
fig.add_trace(go.Scatter(
    x=[senate, senate],
    y=[-1, 15],
    mode='lines',
    name='Anti-Terror Law<br>Passed in the Senate',
    hoverinfo='text',
    hovertext='Highlight: July 2020',
    line=dict(color='orange', width=5),
    showlegend=True  # Show trace label in the legend
))

# Highlight a range
range_start = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2020-11'].index[0]
range_end = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2020-12'].index[0]
fig.add_trace(go.Scatter(
    x=[range_start, range_start, range_end, range_end],
    y=[-1, 15, 15, -1],
    mode='lines',
    fill='toself',
    fillcolor='rgba(0, 255, 153, 0.2)',
    line=dict(color='rgba(0, 255, 153, 0.2)'),
    name='Redtagging<br>Senate Hearing',
    showlegend=True
))

# Highlight a range
range_start_elec = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2021-10'].index[0]
range_end_elec = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2022-06'].index[0]
fig.add_trace(go.Scatter(
    x=[range_start_elec, range_start_elec, range_end_elec, range_end_elec],
    y=[-1, 15, 15, -1],
    mode='lines',
    fill='toself',
    fillcolor='rgba(115, 0, 255, 0.2)',
    line=dict(color='rgba(115, 0, 255, 0.2)'),
    name='Election<br>Period',
    showlegend=True
))

elec = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2022-05'].index[0]
fig.add_trace(go.Scatter(
    x=[elec, elec],
    y=[-1, 15],
    mode='lines',
    name='Election Day',
    hoverinfo='text',
    hovertext='Highlight: July 2020',
    line=dict(color='green', width=5),
    showlegend=True  # Show trace label in the legend
))

teachers = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2021-04'].index[0]
fig.add_trace(go.Scatter(
    x=[teachers , teachers ],
    y=[-1, 15],
    mode='lines',
    name='Health workers\' and teachers\' groups red-tagged by NTF-ELCAC',
    hoverinfo='text',
    hovertext='Highlight: July 2020',
    line=dict(color='lightgreen', width=5),
    showlegend=True  # Show trace label in the legend
))

sona = df_chatter[df_chatter['Month'].dt.strftime('%Y-%m') == '2021-04'].index[0]
fig.add_trace(go.Scatter(
    x=[teachers , teachers ],
    y=[-1, 15],
    mode='lines',
    name='Makabayan bloc protests against SONA',
    hoverinfo='text',
    hovertext='Highlight: July 2020',
    line=dict(color='lightgreen', width=5),
    showlegend=True  # Show trace label in the legend
))

# Peaks
fig.add_trace(go.Scatter(
    x=events.index,
    y=events['Tweet Count'],
    hovertext=events['Month'].dt.strftime('%Y-%m'),
    mode='markers',
    name='Peaks',
    marker=dict(
        color='red',
        size=12,
        symbol='x')))

legend_flag = True

# Change points
for cp_index in result[:-1]:
    x_1 = df_chatter.index[cp_index]
    y_1, y_2, y_3, y_4, y_5 = -1, 0, 2, 4, 5
    y_6, y_7, y_8, y_9, y_10 = 6, 8, 10, 13, 15

    fig.add_trace(go.Scatter(
        x=[x_1, x_1, x_1, x_1, x_1, x_1, x_1, x_1, x_1, x_1],
        y=[y_1, y_2, y_3, y_4, y_5, y_6, y_7, y_8, y_9, y_10],
        mode='lines',
        name='Change points',
        hoverinfo='text',
        hovertext='Change at<br>%s' % df_chatter['Month'].dt.strftime('%Y-%m')[cp_index],
        line=dict(color='black', width=1, dash='dash'),
        showlegend=legend_flag
    ))
    legend_flag = False

fig.update_layout(height=600,
                  title='Red-Tagging of Makabayan Tweet Activity and Notable Events',
                  xaxis_title='Month',
                  yaxis_title='Tweet Count',
                  yaxis_range=[-1, 15])

# Update x-axis tick values and text
xtv = np.arange(len(df_chatter))
xtt = [f"<br>{value}</br>" for value in df_chatter['Month'].dt.strftime('%Y-%m').values]
fig.update_xaxes(tickmode='array',
                 tickvals=xtv,
                 ticktext=xtt)

fig.update_layout(
    font=dict(family='Arial Black', size=16),
    title=dict(font=dict(family='Arial Black', size=30)),
    legend=dict(font=dict(family='Arial Black', size=20)),
    xaxis=dict(title=dict(font=dict(family='Arial Black', size=20))),
    yaxis=dict(title=dict(font=dict(family='Arial Black', size=20))),
    # Update other layout properties as needed
)


fig.show()
