In [25]:
import networkx as nx
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor
import geoip2.database
import pandas as pd
from tqdm import tqdm


In [26]:
# --- 1. Carica il dataset ---
# Assumiamo un CSV con colonne: attacker_ip, target_ip
df = pd.read_csv("../data/cybersecurity_large_synthesized_data.csv")
reader = geoip2.database.Reader("../data/GeoLite2-ASN.mmdb")



In [None]:
def ip_to_country(ip):
    try:
        return reader.country(ip).country.iso_code
    except:
        return "Unknown"

# --- Lista IP concatenata ---
ips = list(df['attacker_ip']) + list(df['target_ip'])

# --- Lista per salvare risultati ---
results = []

# --- Funzione wrapper per tqdm ---
def process_ip(ip):
    return ip_to_country(ip)

# --- ThreadPoolExecutor con barra di progresso ---
with ThreadPoolExecutor(max_workers=16) as executor:
    # tqdm avvolge l'iterator per mostrare la percentuale
    for result in tqdm(executor.map(process_ip, ips), total=len(ips), desc="Processing IPs"):
        results.append(result)

# Separa risultati attacker/target
df['attacker_country'] = results[:len(df)]
df['target_country']   = results[len(df):]

reader.close()

Processing IPs:   1%|          | 2016/200000 [00:36<1:00:32, 54.50it/s]



KeyboardInterrupt: 

In [None]:
# --- 6. Aggrega attacchi per coppia paese→paese ---
edge_df = df.groupby(['attacker_country', 'target_country']).size().reset_index(name='weight')
edge_df = edge_df[(edge_df['attacker_country'] != "Unknown") & (edge_df['target_country'] != "Unknown")]

RuntimeError: ip2c package installed but does not expose 'IP2C' class nor 'lookup' function.
Please install a compatible ip2c (pip install -U ip2c) and restart the kernel.

In [None]:
# --- 7. Crea grafo diretto pesato ---
G = nx.DiGraph()
for _, row in edge_df.iterrows():
    G.add_edge(row['attacker_country'], row['target_country'], weight=row['weight'])

# --- 8. Visualizza grafo base ---
plt.figure(figsize=(12,8))
pos = nx.spring_layout(G, k=1, seed=42)
node_sizes = [500 + 50*G.degree(n) for n in G.nodes()]
edge_weights = [d['weight'] for (_,_,d) in G.edges(data=True)]

nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='skyblue')
nx.draw_networkx_edges(G, pos, width=[w*0.5 for w in edge_weights], arrowstyle='->', arrowsize=15)
nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
plt.title("Rete aggregata di attacchi per paese (paese_attaccante → paese_target)")
plt.axis('off')
plt.show()

# --- 9. Salva edge list pronta per Gephi ---
edge_df.to_csv("cyber_attacks_country_edges.csv", index=False)
print("Edge list salvata!")