In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import re
from collections import Counter
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

In [3]:
def load_seed_addresses(file_path):
    """加载种子地址文件"""
    with open(file_path, 'r') as f:
        addresses = [line.strip() for line in f if line.strip()]
    return addresses
file_path = r'give_data/1_give.txt'
seed_addresses = load_seed_addresses(file_path)
seed_addresses

['2001:1:abde:16::1',
 '2001:1:adc5:8::1',
 '2001:1:a3a4:39::1',
 '2001:1:a920:33::1',
 '2001:1:ac66:18::1',
 '2001:1:a57f:11::1',
 '2001:1:a779:22::1',
 '2001:1:a27d:18::1',
 '2001:1:ad03:24::1',
 '2001:1:a049:16::1',
 '2001:1:affb:32::1',
 '2001:1:ac46:37::1',
 '2001:1:a49c:19::1',
 '2001:1:a4dd:1::1',
 '2001:1:af8c:39::1',
 '2001:1:a102:18::1',
 '2001:1:af2c:24::1',
 '2001:1:aa27:39::1',
 '2001:1:a0a7:37::1',
 '2001:1:ae84:25::1',
 '2001:1:a39d:20::1',
 '2001:1:a09a:40::1',
 '2001:1:aa93:40::1',
 '2001:1:a6ae:32::1',
 '2001:1:a5da:20::1',
 '2001:1:a7a9:32::1',
 '2001:1:a545:37::1',
 '2001:1:a3f8:36::1',
 '2001:1:a8c0:3::1',
 '2001:1:aa11:36::1',
 '2001:1:aef7:25::1',
 '2001:1:a2de:40::1',
 '2001:1:a7d2:1::1',
 '2001:1:a2ec:26::1',
 '2001:1:a8fd:16::1',
 '2001:1:a9a4:36::1',
 '2001:1:a58a:27::1',
 '2001:1:a4f4:1::1',
 '2001:1:ae0f:31::1',
 '2001:1:a302:10::1',
 '2001:1:af8c:7::1',
 '2001:1:a586:2::1',
 '2001:1:a8bb:33::1',
 '2001:1:afce:24::1',
 '2001:1:a972:5::1',
 '2001:1:a9cb:3::1

In [18]:
from sklearn.preprocessing import MinMaxScaler
def extract_segments(ipv6_address):
    """提取IPv6的第三段和第四段，转为十进制"""
    match = re.search(r'2001:1:([0-9a-f]{4}):([0-9a-f]{1,4})::1', ipv6_address)
    if match:
        segment3_hex, segment4_hex = match.groups()
        return int(segment3_hex, 16), int(segment4_hex, 16)  # 转为十进制
    return None, None
decimal_pairs = [extract_segments(addr) for addr in seed_addresses]
decimal_pairs = [pair for pair in decimal_pairs if pair[0] is not None]  # 过滤无效地址

scaled_pairs = scaler.fit_transform(decimal_pairs)

In [19]:
import torch
import torch.nn as nn

class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, output_dim),  # 输出两列数据
            nn.Tanh()
        )
    
    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.model(x)

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置
noise_dim = 100
output_dim = 2    # 输出两列（第3段和第4段）
batch_size = 512
epochs = 1000

G = Generator(noise_dim, output_dim).to(device)
D = Discriminator(output_dim).to(device)

# 使用Wasserstein Loss
G_optimizer = optim.Adam(G.parameters(), lr=0.0001, betas=(0.5, 0.9))
D_optimizer = optim.Adam(D.parameters(), lr=0.0001, betas=(0.5, 0.9))

# 转换数据为Tensor
tensor_data = torch.FloatTensor(scaled_pairs).to(device)

for epoch in range(3000):
    # 训练判别器
    D.zero_grad()
    real_data = tensor_data[torch.randint(0, len(tensor_data), (32,))]
    real_loss = -torch.mean(D(real_data))
    
    z = torch.randn(32, noise_dim).to(device)
    fake_data = G(z)
    fake_loss = torch.mean(D(fake_data.detach()))
    D_loss = real_loss + fake_loss
    D_loss.backward()
    D_optimizer.step()
    
    # 训练生成器
    G.zero_grad()
    z = torch.randn(32, noise_dim).to(device)
    G_loss = -torch.mean(D(G(z)))
    G_loss.backward()
    G_optimizer.step()

In [27]:
import pandas as pd
def generate_ipv6_pairs(G, num_samples):
    """生成符合规范的两段数据组合"""
    valid_pairs = []
    while len(valid_pairs) < num_samples:
        z = torch.randn(num_samples*2, noise_dim).to(device)
        raw_pairs = G(z).detach().cpu().numpy()
        
        # 反归一化并取整
        pairs = scaler.inverse_transform(raw_pairs).round().astype(int)
        
        # 约束范围：第3段[0,65535], 第4段[0,65535]
        mask = (pairs[:, 0] >= 40963) & (pairs[:, 0] <= 45055) & \
               (pairs[:, 1] >= 1) & (pairs[:, 1] <= 64)
        valid = pairs[mask]
        
        # 去重
        valid = np.unique(valid, axis=0)
        valid_pairs.extend(valid.tolist())
    
    # 转换为IPv6格式
    results = []
    for seg3, seg4 in valid_pairs[:num_samples]:
        ipv6 = f"2001:1:{format(seg3, '04x')}:{format(seg4, 'x')}::1"
        results.append({
            "seg3_hex": format(seg3, '04x'),
            "seg3_dec": seg3,
            "seg4_hex": format(seg4, 'x'),
            "seg4_dec": seg4,
            "ipv6": ipv6
        })
    return pd.DataFrame(results)

# 生成1000个新地址
new_pairs = generate_ipv6_pairs(G, 193658)
new_pairs['ipv6'].to_csv('1.csv', index=False,header=False)

In [25]:
new_pairs

Unnamed: 0,seg3_hex,seg3_dec,seg4_hex,seg4_dec,ipv6
0,a011,40977,13,19,2001:1:a011:13::1
1,a015,40981,26,38,2001:1:a015:26::1
2,a01d,40989,2,2,2001:1:a01d:2::1
3,a01e,40990,a,10,2001:1:a01e:a::1
4,a020,40992,10,16,2001:1:a020:10::1
...,...,...,...,...,...
995,a6a4,42660,34,52,2001:1:a6a4:34::1
996,a6a7,42663,14,20,2001:1:a6a7:14::1
997,a6ab,42667,2e,46,2001:1:a6ab:2e::1
998,a6ac,42668,2c,44,2001:1:a6ac:2c::1
