In [39]:
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader, TensorDataset
from pygments.unistring import combine

In [40]:
file_path = './last_data/*.csv'
all_files = glob.glob(file_path)
df_list = []

for file in all_files:
    df = pd.read_csv(file) 
    df_list.append(df) 

combined_df = pd.concat(df_list)
combined_df.sort_values(by='timestamp', inplace=True)
output_file = 'combined.csv'

print(combined_df.head())

             timestamp  pm25_avg_60  windspeed  winddir
0  2023-10-01 00:00:00     3.132667       14.2    329.0
0  2023-10-01 00:00:00     2.669667       14.2    329.0
0  2023-10-01 00:00:00     5.386000       14.2    329.0
0  2023-10-01 00:00:00     3.351000       14.2    329.0
0  2023-10-01 00:00:00     6.283667       14.2    329.0


In [41]:
data = combined_df.dropna()
print(data)

                timestamp  pm25_avg_60  windspeed  winddir
0     2023-10-01 00:00:00     3.132667       14.2    329.0
0     2023-10-01 00:00:00     2.669667       14.2    329.0
0     2023-10-01 00:00:00     5.386000       14.2    329.0
0     2023-10-01 00:00:00     3.351000       14.2    329.0
0     2023-10-01 00:00:00     6.283667       14.2    329.0
...                   ...          ...        ...      ...
2929  2024-01-31 01:00:00    -0.430000        7.4    109.0
2929  2024-01-31 01:00:00     3.785000        7.4    109.0
2929  2024-01-31 01:00:00    15.618000        7.4    109.0
2929  2024-01-31 01:00:00    10.408667        7.4    109.0
2929  2024-01-31 01:00:00     3.558333        7.4    109.0

[120089 rows x 4 columns]


In [42]:
data.reset_index(drop=True, inplace=True)
print(data)

                  timestamp  pm25_avg_60  windspeed  winddir
0       2023-10-01 00:00:00     3.132667       14.2    329.0
1       2023-10-01 00:00:00     2.669667       14.2    329.0
2       2023-10-01 00:00:00     5.386000       14.2    329.0
3       2023-10-01 00:00:00     3.351000       14.2    329.0
4       2023-10-01 00:00:00     6.283667       14.2    329.0
...                     ...          ...        ...      ...
120084  2024-01-31 01:00:00    -0.430000        7.4    109.0
120085  2024-01-31 01:00:00     3.785000        7.4    109.0
120086  2024-01-31 01:00:00    15.618000        7.4    109.0
120087  2024-01-31 01:00:00    10.408667        7.4    109.0
120088  2024-01-31 01:00:00     3.558333        7.4    109.0

[120089 rows x 4 columns]


In [43]:
data.index = data.index.astype('int64')

In [44]:
data

Unnamed: 0,timestamp,pm25_avg_60,windspeed,winddir
0,2023-10-01 00:00:00,3.132667,14.2,329.0
1,2023-10-01 00:00:00,2.669667,14.2,329.0
2,2023-10-01 00:00:00,5.386000,14.2,329.0
3,2023-10-01 00:00:00,3.351000,14.2,329.0
4,2023-10-01 00:00:00,6.283667,14.2,329.0
...,...,...,...,...
120084,2024-01-31 01:00:00,-0.430000,7.4,109.0
120085,2024-01-31 01:00:00,3.785000,7.4,109.0
120086,2024-01-31 01:00:00,15.618000,7.4,109.0
120087,2024-01-31 01:00:00,10.408667,7.4,109.0


In [45]:
data.drop(columns=['timestamp'], inplace=True)
data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(columns=['timestamp'], inplace=True)


Unnamed: 0,pm25_avg_60,windspeed,winddir
0,3.132667,14.2,329.0
1,2.669667,14.2,329.0
2,5.386000,14.2,329.0
3,3.351000,14.2,329.0
4,6.283667,14.2,329.0
...,...,...,...
120084,-0.430000,7.4,109.0
120085,3.785000,7.4,109.0
120086,15.618000,7.4,109.0
120087,10.408667,7.4,109.0


In [46]:
new_columns = list(range(len(data.columns)))
data.columns = new_columns

data

Unnamed: 0,0,1,2
0,3.132667,14.2,329.0
1,2.669667,14.2,329.0
2,5.386000,14.2,329.0
3,3.351000,14.2,329.0
4,6.283667,14.2,329.0
...,...,...,...
120084,-0.430000,7.4,109.0
120085,3.785000,7.4,109.0
120086,15.618000,7.4,109.0
120087,10.408667,7.4,109.0


In [47]:
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)

In [48]:
class GraPhyModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(GraPhyModel, self).__init__()
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_dim, hidden_dim))  

        for _ in range(1, num_layers):
            self.layers.append(nn.Linear(hidden_dim, hidden_dim))  

        self.output_layer = nn.Linear(hidden_dim, input_dim)  

    def forward(self, x):
        for layer in self.layers:
            x = torch.relu(layer(x))  
        x = self.output_layer(x)  
        return x

input_dim = train_data.shape[1]  
hidden_dim = 512
num_layers = 5

model = GraPhyModel(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers)

optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))


criterion = nn.MSELoss()   

In [49]:
train_tensor = torch.tensor(train_data.values, dtype=torch.float32)

train_dataset = TensorDataset(train_tensor, train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

num_epochs = 10
model.train()

GraPhyModel(
  (layers): ModuleList(
    (0): Linear(in_features=3, out_features=512, bias=True)
    (1-4): 4 x Linear(in_features=512, out_features=512, bias=True)
  )
  (output_layer): Linear(in_features=512, out_features=3, bias=True)
)

In [50]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, _ in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, inputs)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}")

Epoch 1/10, Loss: 245.484691
Epoch 2/10, Loss: 7.557329
Epoch 3/10, Loss: 9.025205
Epoch 4/10, Loss: 9.035647
Epoch 5/10, Loss: 7.234597
Epoch 6/10, Loss: 5.103715
Epoch 7/10, Loss: 5.050767
Epoch 8/10, Loss: 4.728178
Epoch 9/10, Loss: 3.779249
Epoch 10/10, Loss: 3.164120


In [89]:
import torch
import torch.nn as nn

class BIMAttack:
    def __init__(self, model, eps=0.1, 
                 alpha=0.05, 
                 steps=10):
        self.model = model
        self.eps = eps 
        self.alpha = alpha  
        self.steps = steps 
        self.loss_fn = nn.MSELoss()  

    def generate(self, data):
        original_data = data.clone().detach()  
        perturbed_data = data.clone().detach()

        for _ in range(self.steps):
            perturbed_data.requires_grad = True
            outputs = self.model(perturbed_data)

            loss = self.loss_fn(outputs, original_data)

            self.model.zero_grad()
            loss.backward()

            grad = perturbed_data.grad.sign()
            perturbed_data = perturbed_data + self.alpha * grad

            perturbation = torch.clamp(perturbed_data - original_data, min=-self.eps, max=self.eps)
            perturbed_data = torch.clamp(original_data + perturbation, min=0, max=1).detach()

        return perturbed_data

In [97]:
bim_attack = BIMAttack(model, eps=0.1,
                       alpha=0.01, 
                       steps=20)

test_tensor = torch.tensor(test_data.values, dtype=torch.float32)

num_samples = test_tensor.shape[0]

perturbed_test_data = []
for i in range(num_samples):
    single_data = test_tensor[i].unsqueeze(0)  
    perturbed_data = bim_attack.generate(single_data)  
    perturbed_test_data.append(perturbed_data)

perturbed_test_data = torch.cat(perturbed_test_data, dim=0)

original_labels = torch.zeros(num_samples)  # 0
anomaly_labels = torch.ones(num_samples)    # 1

combined_data = torch.cat((test_tensor, perturbed_test_data), dim=0)
combined_labels = torch.cat((original_labels, anomaly_labels), dim=0)

In [98]:
with torch.no_grad():
    model.eval()
    predictions = model(combined_data)


predicted_labels = torch.norm(predictions - combined_data, dim=1)  

threshold = 0.06  
predicted_labels = (predicted_labels > threshold).float()  

accuracy = accuracy_score(combined_labels, predicted_labels)
precision = precision_score(combined_labels, predicted_labels)
recall = recall_score(combined_labels, predicted_labels)
f1 = f1_score(combined_labels, predicted_labels)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.5102
Precision: 0.5052
Recall: 0.9782
F1 Score: 0.6663


In [88]:
epsilons = [0.008, 0.05, 0.1, 0.2]
for eps in epsilons:
    bim_attack = BIMAttack(model, eps=eps, alpha=0.01, steps=10)
    perturbed_test_data = bim_attack.generate(test_tensor)
    
    combined_data = torch.cat((test_tensor, perturbed_test_data), dim=0)
    with torch.no_grad():
        model.eval()
        predictions = model(combined_data)
        
    predicted_labels = torch.norm(predictions - combined_data, dim=1)
    threshold = 0.4 * eps  
    predicted_labels = (predicted_labels > threshold).float()
    
    accuracy = accuracy_score(combined_labels, predicted_labels)
    precision = precision_score(combined_labels, predicted_labels)
    recall = recall_score(combined_labels, predicted_labels)
    f1 = f1_score(combined_labels, predicted_labels)
    
    print(f"Epsilon: {eps}, "
          f"Accuracy: {accuracy:.4f}, "
          f"Precision: {precision:.4f}, "
          f"Recall: {recall:.4f}, "
          f"F1 Score: {f1:.4f}")

Epsilon: 0.008, Accuracy: 0.5000, Precision: 0.5000, Recall: 1.0000, F1 Score: 0.6667
Epsilon: 0.05, Accuracy: 0.5049, Precision: 0.5024, Recall: 0.9999, F1 Score: 0.6688
Epsilon: 0.1, Accuracy: 0.5028, Precision: 0.5014, Recall: 0.9835, F1 Score: 0.6642
Epsilon: 0.2, Accuracy: 0.4915, Precision: 0.4954, Recall: 0.9124, F1 Score: 0.6421
