In [2]:
import pandas as pd
import glob
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [3]:
import pandas as pd
import glob

def combine_and_save_csv(input_path, output_file, sort_column):
    all_files = glob.glob(input_path)
    df_list = [pd.read_csv(file) for file in all_files]

    combined_df = pd.concat(df_list)
    combined_df.sort_values(by=sort_column, inplace=True)
    combined_df.to_csv(output_file, index=False)

    return combined_df

# Kullanım
file_path = './Datasets/*.csv'
output_file = 'combined.csv'
sort_column = 'timestamp'

data_k = combine_and_save_csv(file_path, output_file, sort_column)

In [4]:
import pandas as pd

def process_and_merge_data(data, wind_file, timestamp_col, resample_freq, merge_cols, drop_cols):
    """
    Veriyi işleyip, rüzgar verisiyle birleştirir.

    Args:
        data (pd.DataFrame): İşlenecek veri çerçevesi.
        wind_file (str): Rüzgar verisinin dosya yolu.
        timestamp_col (str): Zaman damgası sütununun adı.
        resample_freq (str): Yeniden örnekleme frekansı (ör. 'H' - saatlik).
        merge_cols (list): Rüzgar verisinden birleştirilecek sütunlar.
        drop_cols (list): Birleştirme sonrası kaldırılacak sütunlar.

    Returns:
        pd.DataFrame: İşlenmiş ve birleştirilmiş veri çerçevesi.
    """
    # Zaman damgasını datetime formatına çevir
    data[timestamp_col] = pd.to_datetime(data[timestamp_col])

    # Yeniden örnekleme
    resampled_data = data.resample(resample_freq, on=timestamp_col).mean().reset_index()

    # Rüzgar verisini yükle
    wind = pd.read_csv(wind_file)
    wind['datetime'] = pd.to_datetime(wind['datetime']).dt.tz_localize('UTC')

    # Verileri birleştir
    merged_data = pd.merge(resampled_data, wind[merge_cols], left_on=timestamp_col, right_on='datetime', how='inner')

    # Belirtilen sütunları kaldır
    merged_data = merged_data.drop(columns=drop_cols)

    return merged_data

# Kullanım
data_k_filtered = data_k[['timestamp', 'sensor', 'pm25_avg_60']]
wind_file = 'fresno_wind.csv'
timestamp_col = 'timestamp'
resample_freq = 'H'
merge_cols = ['datetime', 'windspeed', 'winddir']
drop_cols = ['datetime']

merged_data = process_and_merge_data(data_k_filtered, wind_file, timestamp_col, resample_freq, merge_cols, drop_cols)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_k_filtered['timestamp'] = pd.to_datetime(data_k_filtered['timestamp'])
  hourly_avg = data_k_filtered.resample('H', on='timestamp').mean().reset_index()


In [5]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

def normalise(X, scaler=None, columns=None):
    """
    Veriyi ölçeklendirmek için dinamik bir fonksiyon.

    Args:
        X (pd.DataFrame): Ölçeklenecek veri çerçevesi.
        scaler (object): Kullanılacak ölçekleyici (ör. StandardScaler, MinMaxScaler, RobustScaler).
        columns (list): Ölçeklenecek sütunların adları. None ise tüm sütunlar ölçeklenir.

    Returns:
        pd.DataFrame: Ölçeklenmiş veri çerçevesi.
    """
    if scaler is None:
        scaler = StandardScaler()  # Varsayılan olarak StandardScaler kullanılır

    if columns is None:
        columns = X.columns  # Tüm sütunlar ölçeklenir

    X_scaled = scaler.fit_transform(X[columns])
    X_scaled_df = pd.DataFrame(X_scaled, columns=columns, index=X.index)

    # Ölçeklenmeyen sütunları koru
    for col in X.columns:
        if col not in columns:
            X_scaled_df[col] = X[col]

    return X_scaled_df

# Kullanım
scaler = MinMaxScaler()
scaled_data = normalise(merged_data, scaler=scaler, columns=['pm25_avg_60', 'windspeed', 'winddir'])

In [6]:
features = ['pm25_avg_60', 'windspeed', 'winddir']
merged_data[features] = normalise(merged_data[features])

In [15]:
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2924 entries, 0 to 2923
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   timestamp    2924 non-null   datetime64[ns, UTC]
 1   pm25_avg_60  2924 non-null   float64            
 2   windspeed    2924 non-null   float64            
 3   winddir      2924 non-null   float64            
dtypes: datetime64[ns, UTC](1), float64(3)
memory usage: 114.2 KB


In [8]:
import torch
import torch.nn as nn

class AnomalyGenerator:
    def __init__(self, model, eps=0.1, alpha=0.01, steps=10):
        self.model = model
        self.eps = eps
        self.alpha = alpha
        self.steps = steps

    def generate_fgsm(self, data, labels):
        data = data.clone().detach().requires_grad_(True)
        outputs = self.model(data)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.model.zero_grad()
        loss.backward()
        adv_data = data + self.eps * data.grad.sign()
        return torch.clamp(adv_data, 0, 1)

    def generate_bim(self, data, labels):
        adv_data = data.clone().detach()
        for _ in range(self.steps):
            adv_data.requires_grad = True
            outputs = self.model(adv_data)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            self.model.zero_grad()
            loss.backward()
            adv_data = adv_data + self.alpha * adv_data.grad.sign()
            adv_data = torch.clamp(adv_data, 0, 1).detach()
        return adv_data

    def generate_pgd(self, data, labels, random_start=True):
        adv_data = data.clone().detach()
        if random_start:
            adv_data = adv_data + torch.empty_like(adv_data).uniform_(-self.eps, self.eps)
            adv_data = torch.clamp(adv_data, 0, 1).detach()

        for _ in range(self.steps):
            adv_data.requires_grad = True
            outputs = self.model(adv_data)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            self.model.zero_grad()
            loss.backward()
            adv_data = adv_data + self.alpha * adv_data.grad.sign()
            delta = torch.clamp(adv_data - data, min=-self.eps, max=self.eps)
            adv_data = torch.clamp(data + delta, 0, 1).detach()
        return adv_data

In [9]:
from sklearn.model_selection import train_test_split

def split_data(data, labels, train_size, val_size, test_size, random_state=42):
    if train_size + val_size + test_size > 1.0:
        raise ValueError("Toplam oran 1.0'dan büyük olamaz.")

    X_train, X_temp, y_train, y_temp = train_test_split(data, labels, train_size=train_size, random_state=random_state)
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=test_size / (test_size + val_size), random_state=random_state
    )
    return X_train, X_val, X_test, y_train, y_val, y_test

X_train, X_val, X_test, y_train, y_val, y_test = split_data(data, labels, train_size=0.28, val_size=0.04, test_size=0.09)

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

class GraPhyLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GraPhyLayer, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.fc(x)
        out = self.relu(out)
        return out

class GraPhyModel(nn.Module):
    def __init__(self, input_size, hidden_dim=512, output_size=2):
        super(GraPhyModel, self).__init__()
        self.layer1 = GraPhyLayer(input_size, hidden_dim)
        self.layer2 = GraPhyLayer(hidden_dim, hidden_dim)
        self.layer3 = GraPhyLayer(hidden_dim, hidden_dim)
        self.layer4 = GraPhyLayer(hidden_dim, hidden_dim)
        self.layer5 = GraPhyLayer(hidden_dim, hidden_dim)
        self.fc_out = nn.Linear(hidden_dim, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.fc_out(x)
        return x

def initialize_model(input_size, hidden_dim=512, output_size=2, learning_rate=0.0001, loss_function=None):
    """
    Modeli ve optimizasyon ayarlarını başlatır.

    Args:
        input_size (int): Giriş boyutu.
        hidden_dim (int): Gizli katman boyutu.
        output_size (int): Çıkış boyutu.
        learning_rate (float): Öğrenme oranı.
        loss_function (nn.Module): Kayıp fonksiyonu (varsayılan: nn.CrossEntropyLoss).

    Returns:
        tuple: Model, optimizer ve loss function.
    """
    model = GraPhyModel(input_size, hidden_dim, output_size)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999))
    criterion = loss_function if loss_function else nn.CrossEntropyLoss()
    return model, optimizer, criterion

# Kullanım
input_size = X_train.shape[1]
model, optimizer, criterion = initialize_model(input_size, hidden_dim=256, learning_rate=0.001, loss_function=nn.CrossEntropyLoss())

In [11]:
def train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=50, device='cpu'):
    """
    Modeli eğitmek için dinamik bir fonksiyon.

    Args:
        model (nn.Module): Eğitim yapılacak model.
        train_loader (DataLoader): Eğitim veri yükleyicisi.
        val_loader (DataLoader): Doğrulama veri yükleyicisi.
        optimizer (torch.optim.Optimizer): Optimizasyon algoritması.
        criterion (nn.Module): Kayıp fonksiyonu.
        num_epochs (int): Eğitim epoch sayısı.
        device (str): 'cpu' veya 'cuda' (GPU kullanımı için).

    Returns:
        nn.Module: Eğitilmiş model.
    """
    model.to(device)

    for epoch in range(num_epochs):
        # Eğitim modu
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Doğrulama modu
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}, Validation Loss: {val_loss:.4f}")

    return model

# Kullanım
model = train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=50, device='cuda')

Epoch [1/50], Loss: 12.2682, Validation Loss: 4.4748
Epoch [2/50], Loss: 6.0266, Validation Loss: 4.3963
Epoch [3/50], Loss: 5.5184, Validation Loss: 4.3354
Epoch [4/50], Loss: 5.5581, Validation Loss: 4.2708
Epoch [5/50], Loss: 5.3445, Validation Loss: 4.2451
Epoch [6/50], Loss: 5.3636, Validation Loss: 4.2887
Epoch [7/50], Loss: 5.2052, Validation Loss: 4.2075
Epoch [8/50], Loss: 5.2350, Validation Loss: 4.1990
Epoch [9/50], Loss: 5.3892, Validation Loss: 4.1421
Epoch [10/50], Loss: 5.2214, Validation Loss: 4.1043
Epoch [11/50], Loss: 5.1814, Validation Loss: 4.1141
Epoch [12/50], Loss: 5.1718, Validation Loss: 4.1037
Epoch [13/50], Loss: 5.1611, Validation Loss: 4.1176
Epoch [14/50], Loss: 5.2026, Validation Loss: 4.1623
Epoch [15/50], Loss: 5.1363, Validation Loss: 4.0717
Epoch [16/50], Loss: 5.0201, Validation Loss: 4.0594
Epoch [17/50], Loss: 5.1317, Validation Loss: 4.0794
Epoch [18/50], Loss: 5.0604, Validation Loss: 4.0745
Epoch [19/50], Loss: 5.1062, Validation Loss: 4.1288
E

In [12]:
def evaluate_model(model, test_loader, device='cpu'):
    """
    Modeli test etmek için dinamik bir fonksiyon.

    Args:
        model (nn.Module): Test edilecek model.
        test_loader (DataLoader): Test veri yükleyicisi.
        device (str): 'cpu' veya 'cuda' (GPU kullanımı için).

    Returns:
        float: Test doğruluk oranı (%).
    """
    model.to(device)
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Kullanım
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

accuracy = evaluate_model(model, test_loader, device='cuda')

Test Accuracy: 94.79%


In [14]:
def evaluate_model_with_metrics(model, test_loader, target_names, device='cpu'):
    """
    Modeli test eder ve metrikleri hesaplar.

    Args:
        model (nn.Module): Test edilecek model.
        test_loader (DataLoader): Test veri yükleyicisi.
        target_names (list): Sınıf isimleri (ör. ['Normal', 'Anomali']).
        device (str): 'cpu' veya 'cuda' (GPU kullanımı için).

    Returns:
        dict: Hesaplanan metrikler (accuracy, specificity, precision, recall, f1).
    """
    model.to(device)
    model.eval()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_predictions)
    report = classification_report(all_labels, all_predictions, target_names=target_names, zero_division=0)
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    tn, fp, fn, tp = conf_matrix.ravel()

    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    precision = tp / (tp + fp) if (tp + fp) != 0 else 0
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    print("Classification Report:\n", report)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    return {
        "accuracy": accuracy,
        "specificity": specificity,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# Kullanım
metrics = evaluate_model_with_metrics(model, test_loader, target_names=['Normal', 'Anomali'], device='cuda')

Classification Report:
               precision    recall  f1-score   support

      Normal       0.95      1.00      0.97      1382
     Anomali       0.00      0.00      0.00        76

    accuracy                           0.95      1458
   macro avg       0.47      0.50      0.49      1458
weighted avg       0.90      0.95      0.92      1458

Accuracy: 0.9479
Specificity: 1.0000
Precision: nan
Recall: 0.0000
F1 Score: nan


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  precision = tp / (tp + fp)    # Doğruluk (Precision)


In [None]:
class AnomalyGenerator:
    def __init__(self, model, eps=0.1, alpha=0.01, steps=10, random_start=True):
        self.model = model
        self.eps = eps
        self.alpha = alpha
        self.steps = steps
        self.random_start = random_start
        self.methods = {
            'fgsm': self.fgsm,
            'bim': self.bim,
            'pgd': self.pgd
        }

    def fgsm(self, data, labels):
        data = data.clone().detach().requires_grad_(True)
        outputs = self.model(data)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        grad = torch.autograd.grad(loss, data)[0]
        adv_data = data + self.eps * grad.sign()
        return torch.clamp(adv_data, min=0, max=1)

    def bim(self, data, labels):
        adv_data = data.clone().detach()
        for _ in range(self.steps):
            adv_data.requires_grad = True
            outputs = self.model(adv_data)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            self.model.zero_grad()
            loss.backward()
            adv_data = adv_data + self.alpha * adv_data.grad.sign()
            adv_data = torch.clamp(adv_data, min=0, max=1).detach()
        return adv_data

    def pgd(self, data, labels):
        adv_data = data.clone().detach()
        if self.random_start:
            adv_data = adv_data + torch.empty_like(adv_data).uniform_(-self.eps, self.eps)
            adv_data = torch.clamp(adv_data, min=0, max=1).detach()

        for _ in range(self.steps):
            adv_data.requires_grad = True
            outputs = self.model(adv_data)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            self.model.zero_grad()
            loss.backward()
            adv_data = adv_data + self.alpha * adv_data.grad.sign()
            delta = torch.clamp(adv_data - data, min=-self.eps, max=self.eps)
            adv_data = torch.clamp(data + delta, min=0, max=1).detach()
        return adv_data

    def generate_anomalies(self, data, labels, method='fgsm'):
        if method in self.methods:
            return self.methods[method](data, labels)
        else:
            raise ValueError(f"Unknown method: {method}")

In [None]:
anomaly_gen = AnomalyGenerator(model)
X_test_adv = anomaly_gen.generate_anomalies(X_test, y_test)

In [16]:
def split_data(X, y, train_size=0.28, val_size=0.04, test_size=0.09, random_state=42):
    """
    Veriyi eğitim, doğrulama ve test setlerine böler.

    Args:
        X (np.ndarray): Özellikler.
        y (np.ndarray): Etiketler.
        train_size (float): Eğitim seti oranı.
        val_size (float): Doğrulama seti oranı.
        test_size (float): Test seti oranı.
        random_state (int): Rastgelelik için başlangıç değeri.

    Returns:
        tuple: Eğitim, doğrulama ve test setleri (X_train, X_val, X_test, y_train, y_val, y_test).
    """
    if train_size + val_size + test_size > 1.0:
        raise ValueError("Toplam oran 1.0'dan büyük olamaz.")

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, train_size=train_size, random_state=random_state)
    val_test_ratio = test_size / (val_size + test_size)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=val_test_ratio, random_state=random_state)

    return X_train, X_val, X_test, y_train, y_val, y_test

# Kullanım
X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y, train_size=0.28, val_size=0.04, test_size=0.09)

In [17]:
class GraPhyLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GraPhyLayer, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.fc(x)
        out = self.relu(out)
        return out

class GraPhyModel(nn.Module):
    def __init__(self, input_size, hidden_dims, output_size):
        """
        Dinamik GraPhyModel sınıfı.

        Args:
            input_size (int): Giriş boyutu.
            hidden_dims (list): Gizli katman boyutlarını içeren liste.
            output_size (int): Çıkış boyutu.
        """
        super(GraPhyModel, self).__init__()
        self.layers = nn.ModuleList()

        # İlk katman
        self.layers.append(GraPhyLayer(input_size, hidden_dims[0]))

        # Ara katmanlar
        for i in range(1, len(hidden_dims)):
            self.layers.append(GraPhyLayer(hidden_dims[i - 1], hidden_dims[i]))

        # Çıkış katmanı
        self.fc_out = nn.Linear(hidden_dims[-1], output_size)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.fc_out(x)
        return x

In [18]:
class AnomalyGenerator:
    def __init__(self, model):
        self.model = model
        self.methods = {
            'fgsm': self.generate_fgsm,
            'bim': self.generate_bim,
            'pgd': self.generate_pgd
        }

    def generate_fgsm(self, data, labels, eps=0.1):
        data = data.clone().detach().requires_grad_(True)
        outputs = self.model(data)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.model.zero_grad()
        loss.backward()

        adv_data = data + eps * data.grad.sign()
        return adv_data.detach()

    def generate_bim(self, data, labels, eps=0.1, alpha=0.01, steps=10):
        data = data.clone().detach()
        ori_data = data.clone().detach()

        for _ in range(steps):
            data.requires_grad = True
            outputs = self.model(data)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            self.model.zero_grad()
            loss.backward()

            data = data + alpha * data.grad.sign()
            data = torch.clamp(data, ori_data - eps, ori_data + eps).detach()

        return data

    def generate_pgd(self, data, labels, eps=0.1, alpha=0.01, steps=10, random_start=True):
        data = data.clone().detach()
        if random_start:
            data = data + torch.empty_like(data).uniform_(-eps, eps)
            data = torch.clamp(data, min=0, max=1).detach()

        ori_data = data.clone().detach()

        for _ in range(steps):
            data.requires_grad = True
            outputs = self.model(data)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            self.model.zero_grad()
            loss.backward()

            data = data + alpha * data.grad.sign()
            data = torch.clamp(data, ori_data - eps, ori_data + eps).detach()

        return data

    def generate_anomalies(self, data, labels, method='fgsm', **kwargs):
        """
        Dinamik olarak belirtilen yönteme göre anomali üretir.

        Args:
            data (torch.Tensor): Giriş verisi.
            labels (torch.Tensor): Etiketler.
            method (str): Kullanılacak yöntem ('fgsm', 'bim', 'pgd').
            **kwargs: Yönteme özel parametreler.

        Returns:
            torch.Tensor: Üretilen anomali verisi.
        """
        if method in self.methods:
            return self.methods[method](data, labels, **kwargs)
        else:
            raise ValueError(f"Unknown method: {method}")

In [19]:
def train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=50, device='cpu'):
    """
    Modeli eğitmek için dinamik bir fonksiyon.

    Args:
        model (nn.Module): Eğitim yapılacak model.
        train_loader (DataLoader): Eğitim veri yükleyicisi.
        val_loader (DataLoader): Doğrulama veri yükleyicisi.
        optimizer (torch.optim.Optimizer): Optimizasyon algoritması.
        criterion (nn.Module): Kayıp fonksiyonu.
        num_epochs (int): Eğitim epoch sayısı.
        device (str): 'cpu' veya 'cuda' (GPU kullanımı için).

    Returns:
        nn.Module: Eğitilmiş model.
    """
    model.to(device)

    for epoch in range(num_epochs):
        # Eğitim modu
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Doğrulama modu
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}, Validation Loss: {val_loss:.4f}")

    return model

model = GraPhyModel(input_size)
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))
criterion = nn.CrossEntropyLoss()

train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.LongTensor(y_val))

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)

model = train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=50, device='cuda')

Epoch [1/50], Loss: 24.1026, Validation Loss: 1.5193
Epoch [2/50], Loss: 15.6634, Validation Loss: 1.3401
Epoch [3/50], Loss: 15.5176, Validation Loss: 1.4785
Epoch [4/50], Loss: 15.0240, Validation Loss: 1.3777
Epoch [5/50], Loss: 14.8722, Validation Loss: 1.3524
Epoch [6/50], Loss: 14.8079, Validation Loss: 1.2901
Epoch [7/50], Loss: 14.6921, Validation Loss: 1.3838
Epoch [8/50], Loss: 14.7178, Validation Loss: 1.4183
Epoch [9/50], Loss: 14.8644, Validation Loss: 1.3729
Epoch [10/50], Loss: 14.6545, Validation Loss: 1.3491
Epoch [11/50], Loss: 14.7580, Validation Loss: 1.3560
Epoch [12/50], Loss: 14.6857, Validation Loss: 1.4932
Epoch [13/50], Loss: 14.6414, Validation Loss: 1.4618
Epoch [14/50], Loss: 14.5706, Validation Loss: 1.4367
Epoch [15/50], Loss: 14.4608, Validation Loss: 1.3983
Epoch [16/50], Loss: 14.4717, Validation Loss: 1.4505
Epoch [17/50], Loss: 14.5682, Validation Loss: 1.4234
Epoch [18/50], Loss: 14.3284, Validation Loss: 1.5200
Epoch [19/50], Loss: 14.4042, Validat

In [20]:
def evaluate_with_anomalies(model, X_test, y_test, anomaly_generator, methods=['fgsm', 'bim', 'pgd'], device='cpu'):
    """
    Modeli anomali verileriyle test eder ve metrikleri hesaplar.

    Args:
        model (nn.Module): Test edilecek model.
        X_test (np.ndarray): Test verisi.
        y_test (np.ndarray): Test etiketleri.
        anomaly_generator (AnomalyGenerator): Anomali üretici.
        methods (list): Kullanılacak anomali üretim yöntemleri.
        device (str): 'cpu' veya 'cuda' (GPU kullanımı için).

    Returns:
        dict: Hesaplanan metrikler (accuracy, precision, recall, specificity, f1).
    """
    model.to(device)
    model.eval()

    # Anomali verilerini üret
    X_test_tensor = torch.FloatTensor(X_test).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    all_anomalies = []
    for method in methods:
        anomalies = anomaly_generator.generate_anomalies(X_test_tensor, y_test_tensor, method=method)
        all_anomalies.append(anomalies.cpu().numpy())

    # Anomali verilerini birleştir
    X_test_with_anomalies = np.vstack([X_test] + all_anomalies)
    y_test_with_anomalies = np.hstack([y_test, np.ones(sum(a.shape[0] for a in all_anomalies))])

    # Tensorlara dönüştür
    X_test_tensor = torch.FloatTensor(X_test_with_anomalies).to(device)
    y_test_tensor = torch.LongTensor(y_test_with_anomalies).to(device)

    # Modeli test et
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        _, predicted = torch.max(test_outputs, 1)

    # Metrikleri hesapla
    accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())
    precision = precision_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())
    recall = recall_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())
    tn, fp, fn, tp = confusion_matrix(y_test_tensor.cpu().numpy(), predicted.cpu().numpy()).ravel()
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    # Sonuçları yazdır
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'F1 Score: {f1:.4f}')

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "specificity": specificity,
        "f1": f1
    }

# Kullanım
metrics = evaluate_with_anomalies(model, X_test, y_test, anomaly_generator, methods=['fgsm', 'bim', 'pgd'], device='cuda')

Accuracy: 0.0310
Precision: 0.0000
Recall: 0.0000
Specificity: 1.0000
F1 Score: nan


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  f1 = 2 * (precision * recall) / (precision + recall)


In [29]:
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2924 entries, 0 to 2923
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   timestamp    2924 non-null   datetime64[ns, UTC]
 1   pm25_avg_60  2924 non-null   float64            
 2   windspeed    2924 non-null   float64            
 3   winddir      2924 non-null   float64            
dtypes: datetime64[ns, UTC](1), float64(3)
memory usage: 178.8 KB


In [21]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix

# merged_data = pd.read_csv('your_data.csv') # Veriyi yükleyin

def normalise(data):
    return (data - data.mean()) / data.std()

features = ['pm25_avg_60', 'windspeed', 'winddir']
merged_data[features] = normalise(merged_data[features])

In [22]:
def generate_anomalies(data, num_anomalies=1000):
    anomalies = []
    for _ in range(num_anomalies):
        index = np.random.randint(0, data.shape[0])
        anomaly = data[index] + np.random.normal(0, 1, data[index].shape)
        anomalies.append(anomaly)
    return np.array(anomalies)

normal_data = merged_data[features].values

anomaly_data = generate_anomalies(normal_data, num_anomalies=1000)  # 1000 anomali üret

In [24]:
X = np.vstack((normal_data, anomaly_data))
y = np.hstack((np.zeros(normal_data.shape[0]), np.ones(anomaly_data.shape[0])))

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.13, random_state=42)  # %28 eğitim
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.6923, random_state=42)  # %4 doğrulama ve %9 test

In [25]:
class GraPhyLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GraPhyLayer, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.fc(x)
        out = self.relu(out)
        return out

class GraPhyModel(nn.Module):
    def __init__(self, input_size, hidden_dim=512, output_size=2):
        super(GraPhyModel, self).__init__()
        self.layer1 = GraPhyLayer(input_size, hidden_dim)
        self.layer2 = GraPhyLayer(hidden_dim, hidden_dim)
        self.layer3 = GraPhyLayer(hidden_dim, hidden_dim)
        self.layer4 = GraPhyLayer(hidden_dim, hidden_dim)
        self.layer5 = GraPhyLayer(hidden_dim, hidden_dim)
        self.fc_out = nn.Linear(hidden_dim, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.fc_out(x)
        return x

In [26]:
input_size = X_train.shape[1]
model = GraPhyModel(input_size)

optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))
criterion = nn.CrossEntropyLoss()

train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.LongTensor(y_val))

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}, Validation Loss: {val_loss:.4f}")

Epoch [1/50], Loss: 60.1515, Validation Loss: 2.5585
Epoch [2/50], Loss: 44.4334, Validation Loss: 2.1748
Epoch [3/50], Loss: 37.2413, Validation Loss: 1.8297
Epoch [4/50], Loss: 34.1474, Validation Loss: 1.7889
Epoch [5/50], Loss: 31.9695, Validation Loss: 1.6694
Epoch [6/50], Loss: 30.1265, Validation Loss: 1.6336
Epoch [7/50], Loss: 29.5877, Validation Loss: 1.5462
Epoch [8/50], Loss: 29.4214, Validation Loss: 1.4780
Epoch [9/50], Loss: 28.1360, Validation Loss: 1.6396
Epoch [10/50], Loss: 28.2332, Validation Loss: 1.5894
Epoch [11/50], Loss: 27.0595, Validation Loss: 1.4489
Epoch [12/50], Loss: 26.7501, Validation Loss: 1.5674
Epoch [13/50], Loss: 26.2023, Validation Loss: 1.5061
Epoch [14/50], Loss: 26.1530, Validation Loss: 1.5522
Epoch [15/50], Loss: 25.7509, Validation Loss: 1.5736
Epoch [16/50], Loss: 26.2591, Validation Loss: 1.4641
Epoch [17/50], Loss: 25.2213, Validation Loss: 1.4762
Epoch [18/50], Loss: 24.6848, Validation Loss: 1.4933
Epoch [19/50], Loss: 24.2464, Validat

In [27]:
model.eval()
with torch.no_grad():
    test_outputs = model(torch.FloatTensor(X_test))
    _, predicted = torch.max(test_outputs, 1)

accuracy = accuracy_score(y_test, predicted.numpy())
precision = precision_score(y_test, predicted.numpy())
recall = recall_score(y_test, predicted.numpy())
tn, fp, fn, tp = confusion_matrix(y_test, predicted.numpy()).ravel()
specificity = tn / (tn + fp)
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'Specificity: {specificity:.4f}')
print(f'F1 Score: {f1:.4f}')

Accuracy: 0.8898
Precision: 0.8684
Recall: 0.6947
Specificity: 0.9614
F1 Score: 0.7719


In [28]:
model.eval()
with torch.no_grad():
    test_inputs = torch.tensor(X_test, dtype=torch.float32)
    test_outputs = model(test_inputs)

    _, predicted = torch.max(test_outputs, 1)

total_anomalies = np.sum(y_test)  
correct_predictions = np.sum(predicted.numpy()[y_test == 1])  # Modelin doğru tahmin ettiği anomaliler

print(f"Toplam gerçek anomali sayısı: {total_anomalies}")
print(f"Modelin doğru tahmin ettiği anomali sayısı: {correct_predictions}")
print(f"Modelin doğruluk oranı: {correct_predictions / total_anomalies * 100:.2f}%")

Toplam gerçek anomali sayısı: 95.0
Modelin doğru tahmin ettiği anomali sayısı: 66
Modelin doğruluk oranı: 69.47%
