In [None]:
import pandas as pd
from torch.utils.data import Dataset
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import random
import numpy
import math
import os
import re

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

In [None]:
# dataset class

class MyDataset(Dataset):

    def __init__(self, csv_file):
        self.filename = csv_file
        self.data_df = pd.read_csv(csv_file, header=None, sep='\t')
        pass

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, index):
        # image target (label)
        # label = self.data_df.iloc[index,0]
        # target = torch.zeros((10))
        # target[label] = 1.0

        # image data, normalised from 0-255 to 0-1
        # image_values = torch.FloatTensor(self.data_df.iloc[index,0:].values) / 255.0
        image_values = (torch.FloatTensor(self.data_df.iloc[index, 0:].values) - self.data_df.min().min()) / (
                    self.data_df.max().max() - self.data_df.min().min())

        # return label, image data tensor and target tensor
        # return label, image_values, target
        return image_values.to(device)

    def plot_image(self, index):
        img = self.data_df.iloc[index, 0:].values.reshape(25, 233)
        # plt.title("label = " + str(self.data_df.iloc[index,0]))
        plt.title("label = " + self.filename)
        plt.imshow(img, interpolation='none', cmap='Blues')
        pass

    pass

# functions to generate random data

def generate_random_image(size):
    random_data = torch.rand(size).to(device)
    return random_data


def generate_random_seed(size):
    random_data = torch.randn(size).to(device)
    return random_data


# discriminator class

class Discriminator(nn.Module):

    def __init__(self):
        # initialise parent pytorch class
        super().__init__()

        # define neural network layers
        self.model = nn.Sequential(
            nn.Linear(5825, 4800),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(4800),

            nn.Linear(4800, 2400),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(2400),

            nn.Linear(2400, 1200),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(1200),

            nn.Linear(1200, 600),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(600),

            nn.Linear(600, 200),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(200),

            nn.Linear(200, 1),

            nn.Sigmoid()
        )

        # create loss function
        self.loss_function = nn.BCELoss()

        # to gpu
        self.model = self.model.to(device)

        # create optimiser, simple stochastic gradient descent
        self.optimiser = torch.optim.Adam(self.parameters(), lr=0.0001)

        # counter and accumulator for progress
        self.counter = 0;
        self.progress = []

        pass

    def forward(self, inputs):
        # simply run model
        return self.model(inputs)

    def train(self, inputs, targets):
        # calculate the output of the network
        outputs = self.forward(inputs)

        # calculate loss
        loss = self.loss_function(outputs, targets)

        # increase counter and accumulate error every 10
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
            pass
        if (self.counter % 10000 == 0):
            print("counter = ", self.counter)
            pass

        # zero gradients, perform a backward pass, update weights
        self.optimiser.zero_grad()
        loss.backward()
        self.optimiser.step()

        pass

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16, 8), alpha=0.1, marker='.', grid=True, yticks=(0, 0.25, 0.5))
        pass

    pass


# generator class

class Generator(nn.Module):

    def __init__(self):
        # initialise parent pytorch class
        super().__init__()

        # define neural network layers
        self.model = nn.Sequential(
            nn.Linear(100, 200),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(200),

            nn.Linear(200, 600),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(600),

            nn.Linear(600, 1200),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(1200),

            nn.Linear(1200, 2400),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(2400),

            nn.Linear(2400, 4800),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(4800),

            nn.Linear(4800, 5825),
            nn.Sigmoid()
        )

        # create optimiser, simple stochastic gradient descent
        self.optimiser = torch.optim.Adam(self.parameters(), lr=0.0001)

        # to cuda
        self.model = self.model.to(device)

        # counter and accumulator for progress
        self.counter = 0;
        self.progress = []

        pass

    def forward(self, inputs):
        # simply run model
        return self.model(inputs)

    def train(self, D, inputs, targets):
        # calculate the output of the network
        g_output = self.forward(inputs)

        # pass onto Discriminator
        d_output = D.forward(g_output)

        # calculate error
        loss = D.loss_function(d_output, targets)

        # increase counter and accumulate error every 10
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
            pass

        # zero gradients, perform a backward pass, update weights
        self.optimiser.zero_grad()
        loss.backward()
        self.optimiser.step()

        pass

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16, 8), alpha=0.1, marker='.', grid=True, yticks=(0, 0.25, 0.5, 1.0))
        pass

    pass

# 定义一个从文件名中提取数字的函数
def extract_number(filename):
    # 使用正则表达式从文件名中提取数字
    match = re.search(r'\d+', filename)
    if match:
        return int(match.group())
    return 0  # 如果没有数字，返回0

In [None]:
directory = './noisy_data'
total_output = []
rmse=0

# 获取所有.txt文件，然后按照其中的数字进行排序
files = [f for f in os.listdir(directory) if f.endswith('.txt')]
sorted_files = sorted(files, key=extract_number)

In [None]:
for filename in sorted_files:
    filepath = os.path.join(directory, filename)
    print(f"已加载文件: {filepath}")
    dataset = MyDataset(filepath)

    D = Discriminator()
    
    for image_data_tensor in dataset:
        # real data
        D.train(image_data_tensor, torch.cuda.FloatTensor([1.0], device=device))
        # fake data
        D.train(generate_random_image(5825).to(device), torch.cuda.FloatTensor([0.0], device=device))
        pass
    
    
    # create Discriminator and Generator
    D = Discriminator().to(device)
    G = Generator().to(device)
    
    epochs = 4
    print("start")
    
    for epoch in range(epochs):
        print("epoch = ", epoch + 1)
    
        # train Discriminator and Generator
    
        for image_data_tensor in dataset:
            image_data_tensor = image_data_tensor.to(device)
    
            true_labels = torch.tensor([1.0], dtype=torch.float).to(device)
            D.train(image_data_tensor, true_labels)
    
            fake_images = G.forward(generate_random_seed(100)).detach()
            false_labels = torch.tensor([0.0], dtype=torch.float).to(device)
            D.train(fake_images, false_labels)
    
            # train generator
            for i in range(5):
                G.train(D, generate_random_seed(100), torch.tensor([1.0], dtype=torch.float).to(device))
            pass
    
    pass
    print("finish")
    
    output = G.forward(generate_random_seed(100))
    output_cpu = output.detach().cpu().numpy()
    total_output.append(output_cpu)
    #numpy.savetxt('output.txt', output_cpu, delimiter='\t', fmt='%f')
    
    sum = 0
    # 获取第0个数据项
    image_values = dataset[1]  # 使用__getitem__方法
    
    for i in range(5825):
        # 获取image_values的第一行第一个元素
        sum += (output_cpu[i] - image_values[i].item())**2
    sum /= 5825
    sum = math.sqrt(sum)
    rmse += sum
    print("当前用户误差：" + str(sum))

In [None]:
total_output.reshape(399, -1)
numpy.savetxt('output.txt', total_output, delimiter='\t', fmt='%f')
print(rmse / 399)