# Interaction Differences Analysis

A storage-access request will automatically be denied when no user interaction preceeded the first time access is requested. This means that websites might not call SAA functions until a specific user interaction happened. This notebook is used to analyze whether there is a significant difference for the detection of SAA via function hooking if user interactions are performed on the visited site or not. 

## Imports and Helper Functions

In [None]:
from database import Task, URL
from IPython.display import display, HTML
from modules.storageaccessapi import Document, DocumentInclusion, Script, ScriptInclusion, SaaCall
from peewee import fn
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

In [None]:
def get_all_sites_calls(job: str):
    query = (
        SaaCall
        .select(
            SaaCall.site
        )
        .where((SaaCall.job == job) & ((getattr(SaaCall, "has_saa") == True) | (getattr(SaaCall, "request_saa") == True)))
        .distinct(SaaCall.site)
    )
    res = pd.DataFrame(list(query.dicts()))
    return res

---

## Collecting Data

In [None]:
timestamps = list(range(1, 6))

# Collect data
with_interaction = []
without_interaction = []
for i in range(1, 6):
    with_interaction.append(len(get_all_sites_calls(f"storageaccessapiwi{i}")))
    without_interaction.append(len(get_all_sites_calls(f"storageaccessapiwoi{i}")))
sites_with, sites_without = set(), set()
for i in range(1, 6):
    sites_with = sites_with | set(get_all_sites_calls(f"storageaccessapiwi{i}")["site"])
    sites_without = sites_without | set(get_all_sites_calls(f"storageaccessapiwoi{i}")["site"])
sites_both = sites_with & sites_without
sites_only_with = sites_with - sites_both
sites_only_without = sites_without - sites_both

---

## Plotting Results

In [None]:
fig1 = go.Figure()
fig1.add_trace(go.Bar(
    x=timestamps,
    y=with_interaction,
    name='With Interaction',
    marker_color='light blue',
    width=0.6,
    showlegend=False
))

# Total unique sites
fig1.add_trace(go.Scatter(
    x=[0] + timestamps + [6],
    y=[len(sites_with)] * (len(timestamps) + 2),
    mode='lines',
    line=dict(color='red'),
    name='Total unique sites',
    showlegend=True
))

# Average
fig1.add_trace(go.Scatter(
    x=[0] + timestamps + [6],
    y=[sum(with_interaction)/len(with_interaction)] * (len(timestamps) + 2),
    mode='lines',
    line=dict(dash='dash', color='black'),
    name='Average',
    showlegend=True
))

fig1.update_layout(
    title_text='With Interaction',
    yaxis=dict(title="№ of Sites", range=[225, 325], ticklabelposition="outside", ticksuffix=' '),
    xaxis=dict(title='Crawl', tickvals=[1, 2, 3, 4, 5]),
    width=520
)

pio.show(fig1)

In [None]:
fig2 = go.Figure()
fig2.add_trace(go.Bar(
    x=timestamps,
    y=without_interaction,
    name='Without Interaction',
    marker_color='orange',
    width=0.6,
    showlegend=False,
))

# Total unique sites
fig2.add_trace(go.Scatter(
    x=[0] + timestamps + [6],
    y=[len(sites_without)] * (len(timestamps) + 2),
    mode='lines',
    line=dict(color='red'),
    name='Total unique sites',
    showlegend=True
))

# Average
fig2.add_trace(go.Scatter(
    x=[0] + timestamps + [6],
    y=[sum(without_interaction)/len(without_interaction)] * (len(timestamps) + 2),
    mode='lines',
    line=dict(dash='dash', color='black'),
    name='Average',
    showlegend=True
))

fig2.update_layout(
    title_text='Without Interaction',
    showlegend=True,
    yaxis=dict(title="№ of Sites", range=[225, 325], ticklabelposition="outside", ticksuffix=' '),
    xaxis=dict(title='Crawl', tickvals=[1, 2, 3, 4, 5]),
    width=520
)

pio.show(fig2)

In [None]:
values = [len(sites_both), len(sites_only_with), len(sites_only_without)]
bar = go.Bar(
    x=values[::-1],
    y=["Found w/ and   <br>w/o Interaction", "Found only   <br>w/ Interaction", "Found only   <br>w/o Interaction"][::-1],
    orientation='h',
    marker=dict(color=['green', 'blue', 'orange'][::-1]),
    showlegend=False,
    text=values[::-1],
    textposition=['inside' if v > 50 else 'outside' for v in values[::-1]],
    textfont=dict(color=['white' if v > 50 else 'black' for v in values[::-1]])
)

fig = go.Figure(bar)
fig.update_layout(
    title_text='Distribution of Sites Found',
    showlegend=True,
    yaxis=dict(title="", ticklabelposition="outside", ticksuffix='   '),
    xaxis=dict(title="№ of Sites", range=[0, 300]),
    width=1000,
    bargap=0.3,
    height=325,
)

fig.show()

---

## Statistical Test

In [None]:
from scipy import stats
print(with_interaction, without_interaction)

# Establish the Null Hypothesis
print("Null Hypothesis: There is no significant difference between the two techniques.\n")

# Independent two-sample t-test
t_stat, p_value = stats.ttest_ind(with_interaction, without_interaction, alternative='two-sided')
print(f"P-value: {p_value} {'>= 0.05' if p_value >= 0.05 else '< 0.05'}")

# Conclusion based on p-value
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference between the two techniques.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the two techniques.")