In [60]:
import os
import pandas as pd
from yaml import full_load

# Veri klasörü
data_dir = 'last_data'  
all_files = os.listdir(data_dir)
dataframes = []

# Her CSV dosyasını okuyup bir DataFrame listesine ekliyoruz
for file in all_files:
    if file.endswith('.csv'):
        df = pd.read_csv(os.path.join(data_dir, file))
        # 'timestamp' kolonunu çıkarıyoruz
        if 'timestamp' in df.columns:
            df = df.drop(columns=['timestamp'])
        dataframes.append(df)

# Tüm DataFrame'leri birleştiriyoruz
full_data = pd.concat(dataframes, ignore_index=True)

# Verinin ilk birkaç satırını gösterelim
print(full_data.head())

   pm25_avg_60  windspeed  winddir
0     3.132667       14.2    329.0
1     3.130000        9.2    329.0
2     3.424667        5.5    342.0
3     3.349500        0.3      3.0
4     3.379000        0.2      5.0


In [61]:
full_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120130 entries, 0 to 120129
Data columns (total 3 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   pm25_avg_60  120130 non-null  float64
 1   windspeed    120089 non-null  float64
 2   winddir      120089 non-null  float64
dtypes: float64(3)
memory usage: 2.7 MB


In [62]:
full_data = full_data.dropna()  # Tüm NaN içeren satırları kaldır

In [63]:
full_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 120089 entries, 0 to 120129
Data columns (total 3 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   pm25_avg_60  120089 non-null  float64
 1   windspeed    120089 non-null  float64
 2   winddir      120089 non-null  float64
dtypes: float64(3)
memory usage: 3.7 MB


In [64]:
X_train, X_test = train_test_split(full_data, test_size=0.2, random_state=42)

In [65]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [66]:
class GraPhy(tf.keras.Model):
    def __init__(self, hidden_dim=512):
        super(GraPhy, self).__init__()
        self.layers_list = [tf.keras.layers.Dense(hidden_dim, activation='relu') for _ in range(4)]
        self.output_layer = tf.keras.layers.Dense(X_train.shape[1], activation='sigmoid')  # Giriş boyutuyla eşleşen çıkış

    def call(self, inputs):
        x = inputs
        for layer in self.layers_list:
            x = layer(x)
        return self.output_layer(x)

In [67]:
model = GraPhy()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001,
                                                 beta_1=0.9, 
                                                 beta_2=0.999),
              loss='mean_squared_error')  # Giriş verisi ile çıkış arasındaki farkı minimize et

# Modeli eğitiyoruz
model.fit(X_train, X_train, batch_size=32, epochs=50)

Epoch 1/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - loss: 0.6370
Epoch 2/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.5950
Epoch 3/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.5751
Epoch 4/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - loss: 0.5884
Epoch 5/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - loss: 0.5806
Epoch 6/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.5875
Epoch 7/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.5835
Epoch 8/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.5906
Epoch 9/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 0.5927
Epoch 10/50
[1m3003/3003[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x3157bad70>

In [74]:
# PGD ile anomali oluşturma fonksiyonu
def generate_anomalies_pgd(model, data, n_anomalies, epsilon, alpha, num_steps):
    anomalies = []
    
    for _ in range(n_anomalies):
        # Rastgele bir gözlem seç
        original_sample = data[np.random.randint(len(data))]
        anomaly = tf.convert_to_tensor(original_sample.copy())  # Tensor'a dönüştür

        # Anomali oluşturma süreci
        for _ in range(num_steps):
            with tf.GradientTape() as tape:
                tape.watch(anomaly)  # Anomali tensor'ını takip et
                prediction = model(tf.expand_dims(anomaly, axis=0))
                # Kayıp hesapla (orijinal örneği hedef olarak al)
                loss = tf.keras.losses.mean_squared_error(original_sample, prediction)
            gradients = tape.gradient(loss, anomaly)
            # PGD güncellemesi
            anomaly = anomaly + alpha * tf.sign(gradients)
            # Epsilon'a göre sınırlama
            anomaly = tf.clip_by_value(anomaly, original_sample - epsilon, original_sample + epsilon)

        anomalies.append(anomaly.numpy())
    
    return np.array(anomalies)

In [75]:
# Test verileri ile anomali oluşturma
n_anomalies = len(X_test)  # Test verisindeki gözlem sayısı kadar anomali üret
anomalies = generate_anomalies_pgd(model, X_test, n_anomalies, epsilon=0.1, alpha=0.01, num_steps=10)

AttributeError: module 'keras._tf_keras.keras.losses' has no attribute 'mean_squared_error'

In [None]:
# Anomalilerin etiketlerini saklama
anomaly_labels = np.ones(n_anomalies)  # Tüm anomali etiketlerini 1 (anomalik) olarak ayarla

# Normal verilerin etiketlerini oluşturma (0 olarak)
normal_labels = np.zeros(len(X_test))

In [None]:
# Modelin tahmini
predictions = model.predict(anomalies)
predicted_labels = (predictions > 0.5).astype(int)  # 0.5 eşik değerini kullanarak tahminleri ikili forma çevir

# Sonuçları karşılaştırma
comparison = np.column_stack((anomaly_labels, predicted_labels))
print(comparison)  # Gerçek ve tahmin edilen etiketleri göster