# Case Study NTP Amplification

Notebook case-study NTP Amplification.

## Setup ambiente

## Caricamento dati

## Calcolo entropia e rapporti

In [5]:
# Generazione script compatibile con Jupyter Docker (/workspace/datasets/NTP_amplification/)
notebook_code = """
import pandas as pd
import numpy as np
from scipy.stats import entropy

# Caricamento del dataset
dataset_path = "/workspace/datasets/NTP_amplification/a_ntp_amplification_dataset.csv"
df = pd.read_csv(dataset_path)

# Calcolo entropia su finestre di 10 pacchetti
window_size = 10
df['window_id'] = df.index // window_size

# Entropia sulla distribuzione delle etichette
window_entropy = df.groupby('window_id')['label']\\
    .apply(lambda x: entropy(x.value_counts(normalize=True), base=2))\\
    .reset_index(name='entropy')

# Merge e calcolo ∆H
df = df.merge(window_entropy, on='window_id', how='left')
df['delta_H'] = df['entropy'].diff()

# Salvataggio dataset aggiornato
output_path = "/workspace/datasets/NTP_amplification/b_with_entropy.csv"
df.to_csv(output_path, index=False)

print(f"✅ File salvato in: {output_path}")
df.head()
"""

script_path = "/workspace/datasets/NTP_amplification/step_O2_entropy_pipeline.py"
with open(script_path, "w") as f:
    f.write(notebook_code)

In [6]:
# Script for O1 - RDF serialization, compatible with /workspace environment
rdf_script = """
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, XSD
import pandas as pd

# Load input with entropy
df = pd.read_csv("/workspace/datasets/NTP_amplification/b_with_entropy.csv")

# Prepare RDF graph
g = Graph()
NTP = Namespace("http://example.org/ntp#")
g.bind("ntp", NTP)

# Serialize each packet
for _, row in df.iterrows():
    packet_uri = URIRef(f"http://example.org/ntp/packet/{row['packet_id']}")
    g.add((packet_uri, RDF.type, NTP.Packet))
    g.add((packet_uri, NTP.hasBandwidth, Literal(int(row['bandwidth_MB']), datatype=XSD.integer)))
    g.add((packet_uri, NTP.hasLabel, Literal(str(row['label']), datatype=XSD.string)))
    
    if not pd.isna(row['entropy']):
        g.add((packet_uri, NTP.hasEntropy, Literal(float(row['entropy']), datatype=XSD.float)))
    if not pd.isna(row['delta_H']):
        g.add((packet_uri, NTP.hasDeltaH, Literal(float(row['delta_H']), datatype=XSD.float)))

# Save RDF triples
rdf_path = "/workspace/datasets/NTP_amplification/O1_rdf_serialisation.ttl"
g.serialize(destination=rdf_path, format='turtle')
print(f"✅ RDF serialization completed: {rdf_path}")
"""

rdf_script_path = "/workspace/datasets/NTP_amplification/step_O1_rdf_serialisation.py"
with open(rdf_script_path, "w") as f:
    f.write(rdf_script)

## Defence Stack

In [8]:
# Python script for Step O2: anomaly marking based on ∆H threshold
step_O2_anomaly_script = """
import pandas as pd

# Load entropy-enriched dataset
df = pd.read_csv("/workspace/datasets/NTP_amplification/b_with_entropy.csv")

# Define anomaly threshold (as per paper, ∆H ≥ 1.5 bits)
THRESHOLD_DH = 1.5

# Flag anomalies
df['anomaly_flag'] = df['delta_H'] >= THRESHOLD_DH

# Save to new CSV
output_path = "/workspace/datasets/NTP_amplification/c_with_anomaly_flag.csv"
df.to_csv(output_path, index=False)

print(f"✅ Anomaly marking completed. Output saved to: {output_path}")
print(f"🔍 Total anomalies detected: {df['anomaly_flag'].sum()}")
"""

anomaly_script_path = "/workspace/datasets/NTP_amplification/step_O2_anomaly_flag.py"
with open(anomaly_script_path, "w") as f:
    f.write(step_O2_anomaly_script)

In [11]:
# Python script for Step O3 – One-hot vectorization of ARNN input features
step_O3_vectorization_script = """
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder

# Load dataset with anomaly flags
df = pd.read_csv("/workspace/datasets/NTP_amplification/c_with_anomaly_flag.csv")

# Simulate network-level categorical features
np.random.seed(42)
df['srcIP'] = np.random.choice(['192.168.1.1', '10.0.0.5', '172.16.0.2'], size=len(df))
df['dstIP'] = np.random.choice(['8.8.8.8', '1.1.1.1'], size=len(df))
df['udp_port'] = np.random.choice([123, 53, 161], size=len(df))  # NTP, DNS, SNMP
df['NTP_cmd'] = np.random.choice(['monlist', 'version', 'readvar'], size=len(df))

# Apply one-hot encoding
features_to_encode = df[['srcIP', 'dstIP', 'udp_port', 'NTP_cmd']]
encoder = OneHotEncoder(sparse=False)
encoded_array = encoder.fit_transform(features_to_encode)
encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(features_to_encode.columns))

# Merge encoded features back
df_encoded = pd.concat([df.reset_index(drop=True), encoded_df], axis=1)

# Save vectorized dataset
output_path = "/workspace/datasets/NTP_amplification/d_vectorized.csv"
df_encoded.to_csv(output_path, index=False)

print(f"✅ Vectorization complete. Output saved to: {output_path}")
"""

vectorization_script_path = "/workspace/datasets/NTP_amplification/step_O3_vectorization.py"
with open(vectorization_script_path, "w") as f:
    f.write(step_O3_vectorization_script)

In [12]:
# Python script for Step O4 – LSTM-based ARNN simulation
step_O4_arnn_script = """
import pandas as pd
import numpy as np

# Load vectorized dataset
df = pd.read_csv("/workspace/datasets/NTP_amplification/d_vectorized.csv")

# Select input features for ARNN (one-hot encoded)
input_features = [col for col in df.columns if col.startswith(('srcIP_', 'dstIP_', 'udp_port_', 'NTP_cmd_'))]
X = df[input_features].values.astype(np.float32)

# Simulate ARNN output risk scores for t+1 based on window
np.random.seed(42)
window_size = 5
df['R_i'] = np.nan

for i in range(window_size, len(df)):
    df.loc[i, 'R_i'] = np.clip(0.2 + 0.75 * np.mean(X[i - window_size:i]), 0.2, 0.95)

# Apply mitigation threshold as in paper: Ri > 0.55
df['under_mitigation'] = df['R_i'] > 0.55

# Save to file
output_path = "/workspace/datasets/NTP_amplification/e_with_risk_scores.csv"
df.to_csv(output_path, index=False)

print(f"✅ ARNN simulation complete. Output saved to: {output_path}")
print(f"🚨 Mitigation triggered on {df['under_mitigation'].sum()} packets.")
"""

arnn_script_path = "/workspace/datasets/NTP_amplification/step_O4_arnn_simulation.py"
with open(arnn_script_path, "w") as f:
    f.write(step_O4_arnn_script)

In [13]:
# Python script for Step O5 – RDF risk graph creation
step_O5_rdf_script = """
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, XSD
import pandas as pd

# Load ARNN output
df = pd.read_csv("/workspace/datasets/NTP_amplification/e_with_risk_scores.csv")

# Initialize RDF graph
g = Graph()
NTP = Namespace("http://example.org/ntp#")
g.bind("ntp", NTP)

# Add triples for risk scores and mitigation status
for _, row in df.iterrows():
    packet_uri = URIRef(f"http://example.org/ntp/packet/{row['packet_id']}")
    g.add((packet_uri, RDF.type, NTP.Packet))
    
    if not pd.isna(row['R_i']):
        g.add((packet_uri, NTP.hasRiskScore, Literal(round(row['R_i'], 4), datatype=XSD.float)))
    
    if row.get('under_mitigation', False):
        g.add((packet_uri, NTP.underMitigation, Literal(True, datatype=XSD.boolean)))

# Save graph to TTL
rdf_path = "/workspace/datasets/NTP_amplification/f_risk_graph.ttl"
g.serialize(destination=rdf_path, format="turtle")

print(f"✅ RDF risk graph generated and saved to: {rdf_path}")
"""

rdf_graph_script_path = "/workspace/datasets/NTP_amplification/step_O5_rdf_risk_graph.py"
with open(rdf_graph_script_path, "w") as f:
    f.write(step_O5_rdf_script)

In [14]:
# Regenerate SPARQL query script after kernel reset
step_O6_sparql_script = """
from rdflib import Graph

# Load the RDF risk graph
g = Graph()
rdf_path = "/workspace/datasets/NTP_amplification/f_risk_graph.ttl"
g.parse(rdf_path, format="ttl")

# Run SPARQL query to extract packets under mitigation
query = '''
PREFIX ntp: <http://example.org/ntp#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?packet ?riskScore
WHERE {
  ?packet a ntp:Packet ;
          ntp:hasRiskScore ?riskScore ;
          ntp:underMitigation "true"^^xsd:boolean .
}
ORDER BY DESC(?riskScore)
'''

results = g.query(query)

# Print results
print("🔍 Packets under mitigation (Ri > 0.55):")
for row in results:
    print(f"{row.packet} -> Risk Score: {row.riskScore}")
"""

sparql_script_path = "/workspace/datasets/NTP_amplification/step_O6_sparql_query.py"
with open(sparql_script_path, "w") as f:
    f.write(step_O6_sparql_script)