In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam

# Load and preprocess data
df = pd.read_csv("Dataset_timeseries.csv")
data = df.groupby("Number")
split = {g: d for g, d in data}
dataset = split[1].iloc[:, 1:]

sc = MinMaxScaler(feature_range=(-1, 1))
sequences = dataset['SOPAS'].values.reshape(-1, 1)

# Isolation Forest for outlier detection
model = IsolationForest(contamination=0.05, random_state=42)
model.fit(sequences)

# Predict outliers
predictions = model.predict(sequences)
outliers = np.where(predictions == -1)
outlier_data = sequences[outliers].reshape(-1, 1)
un_transformed=outlier_data
outlier_data = sc.fit_transform(outlier_data)
normal_data = sequences[np.where(predictions == 1)].reshape(-1, 1)
print(len(normal_data))

# Define GAN components
def build_generator(noise_dim):
    model = Sequential()
    
    model.add(Dense(256, input_dim=noise_dim))
    model.add(LeakyReLU(alpha=0.01))
    model.add(BatchNormalization())
    
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.01))
    model.add(BatchNormalization())
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.01))
    model.add(BatchNormalization())
    
    model.add(Dense(2048))
    model.add(LeakyReLU(alpha=0.01))
    model.add(BatchNormalization())
    
    model.add(Dense(1, activation='tanh')) 
    return model

def build_discriminator():
    model = Sequential()
    model.add(Dense(256, input_dim=1))
    model.add(LeakyReLU(alpha=0.01))
    model.add(Dropout(0.4))
    
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.01))
    model.add(Dropout(0.4))
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.01))
    model.add(Dropout(0.4))
    
    model.add(Dense(2048))
    model.add(LeakyReLU(alpha=0.01))
    model.add(Dropout(0.4))
    
    model.add(Dense(1, activation='sigmoid'))
    return model

def build_gan(generator, discriminator):
    model = Sequential()
    model.add(generator)
    model.add(discriminator)
    return model

# Initialize models
noise_dim = 200
generator = build_generator(noise_dim)
discriminator = build_discriminator()

# Compile models
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5), metrics=['accuracy'])
discriminator.trainable = False
gan = build_gan(generator, discriminator)
gan.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))

# Training function
@tf.function
def train_step(real_samples):
    batch_size = real_samples.shape[0]

    # Generate fake samples
    noise = tf.random.normal([batch_size, noise_dim])
    generated_samples = generator(noise, training=True)

    # Real and fake labels
    real_labels = tf.ones((batch_size, 1))
    fake_labels = tf.zeros((batch_size, 1))

    # Train discriminator
    with tf.GradientTape() as tape:
        d_loss_real = discriminator(real_samples, training=True)
        d_loss_fake = discriminator(generated_samples, training=True)
        d_loss_real = tf.reduce_mean(tf.keras.losses.binary_crossentropy(real_labels, d_loss_real))
        d_loss_fake = tf.reduce_mean(tf.keras.losses.binary_crossentropy(fake_labels, d_loss_fake))
        d_loss = 0.5 * (d_loss_real + d_loss_fake)
    
    grads = tape.gradient(d_loss, discriminator.trainable_variables)
    if grads and any(g is not None for g in grads):
        grads_and_vars = [(g, v) for g, v in zip(grads, discriminator.trainable_variables) if g is not None]
        discriminator.optimizer.apply_gradients(grads_and_vars)

    # Train generator
    with tf.GradientTape() as tape:
        noise = tf.random.normal([batch_size, noise_dim])
        generated_samples = generator(noise, training=True)
        g_loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(real_labels, discriminator(generated_samples, training=True)))
    
    grads = tape.gradient(g_loss, generator.trainable_variables)
    if grads and any(g is not None for g in grads):
        grads_and_vars = [(g, v) for g, v in zip(grads, generator.trainable_variables) if g is not None]
        gan.optimizer.apply_gradients(grads_and_vars)

    return d_loss, g_loss

def train_gan(epochs, batch_size, data):
    for epoch in range(epochs):
        idx = np.random.randint(0, data.shape[0], batch_size)
        real_samples = data[idx]

        d_loss, g_loss = train_step(real_samples)

      
        if epoch % 100 == 0:
            print(f"{epoch}: Discriminator Loss: {d_loss.numpy()}, Generator Loss: {g_loss.numpy()}")


data = outlier_data

# Train the GAN
train_gan(epochs=20000, batch_size=128, data=data)




noise = np.random.normal(0, 1, size=(1033,noise_dim))
generated_samples = generator.predict(noise)
generated_samples = sc.inverse_transform(generated_samples)
real_mean = np.mean(un_transformed)
real_std = np.std(un_transformed)
generated_mean = np.mean(generated_samples)
generated_std = np.std(generated_samples)

print(f"Real Data - Mean: {real_mean}, Std: {real_std}")
print(f"Generated Data - Mean: {generated_mean}, Std: {generated_std}")


In [None]:
generator.save('Generator.h5')

In [None]:

print(normal_data.shape)
noise = np.random.normal(0, 1, size=(20333,noise_dim))
generator= tf.keras.models.load_model('Generator.h5')

generated_samples = generator.predict(noise)
generated_samples = sc.inverse_transform(generated_samples)
real_mean = np.mean(un_transformed)
real_std = np.std(un_transformed)
generated_mean = np.mean(generated_samples)
generated_std = np.std(generated_samples)

print(f"Real Data - Mean: {real_mean}, Std: {real_std}")
print(f"Generated Data - Mean: {generated_mean}, Std: {generated_std}")
print(generated_samples)
print()

In [179]:
print(un_transformed.shape)
noise = np.random.normal(0, 1, size=(1033,noise_dim))
generated_samples = generator.predict(noise)
generated_samples = sc.inverse_transform(generated_samples)

(1033, 1)
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Example datasets (replace these with your actual datasets)
actual_data = un_transformed
generated_data = generated_samples
# Create a KDE plot
plt.figure(figsize=(12, 6))

# Plot KDE for the actual dataset
sns.kdeplot(actual_data, label='Actual Data', color='blue', fill=False, alpha=1)

# Plot KDE for the generated dataset
sns.kdeplot(generated_data, label='Generated Data', color='yellow', fill=False, alpha=0.3)

# Add labels and title
plt.xlabel('Feature Value')
plt.ylabel('Density')
plt.title('KDE Comparison of Actual and Generated Datasets')
plt.legend()

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Example data: single feature point

# Create a scatter plot
plt.figure(figsize=(15, 10))
plt.scatter(range(len(generated_samples)), generated_samples, c='blue', marker='o', edgecolor='white', zorder=5)
plt.scatter(range(len(sequences)), sequences, c='red', marker='o', edgecolor='black')

# Add title and labels
plt.title("Single Feature Points")
plt.xlabel("Index")
plt.ylabel("Feature Value")

# Display the plot
plt.show()


In [None]:
iso_forest = IsolationForest(contamination=0.002, random_state=42)
iso_forest.fit(generated_samples)


predictions = iso_forest.predict(generated_samples)
outliers = np.where(predictions == 1)
true_val = np.where(predictions == -1)
actual_noise = generated_samples[outliers].reshape(-1, 1)
outlier_data = generated_samples[true_val].reshape(-1, 1)
print(len(outlier_data))
plt.figure(figsize=(15, 10))
plt.scatter(range(len(actual_noise)),actual_noise, c='blue', marker='o', edgecolor='black', zorder=5)
plt.scatter(range(len(outlier_data)), outlier_data, c='red', marker='o', edgecolor='black')

# Add title and labels
plt.title("Single Feature Points")
plt.xlabel("Index")
plt.ylabel("Feature Value")

# Display the plot
plt.show()



In [18]:
df = pd.read_csv("Dataset_timeseries.csv")
data = df.groupby("Number")
split = {g: d for g, d in data}
dataset = split[1].iloc[:, 1:]
sequences = dataset['SOPAS'].values.reshape(-1, 1)
print(sequences.shape)

(20644, 1)


In [None]:

actual_noise=pd.DataFrame(generated_samples,columns=["SOPAS"])
actual_noise['Number']=0
normal_data=pd.DataFrame(sequences,columns=["SOPAS"])
normal_data['Number']=1
final_dataset=pd.concat([normal_data,actual_noise],ignore_index=True)
# Shuffle the combined dataframe
final_dataset.to_csv('final_dataset.csv')
print(final_dataset)

In [None]:

from sklearn.model_selection import train_test_split
X=final_dataset['SOPAS'].values.reshape(-1,1)
X=sc.fit_transform(X)
Y=final_dataset['Number'].values.reshape(-1,1)
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=45)
shape=x_train.shape
print(x_train,y_train)

model = Sequential()
model.add(Dense(32,activation='relu', input_dim=1))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=50, batch_size=64, validation_split=0.2)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages


y_pred = model.predict(x_test)
y_pred = (y_pred > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# Print results
print(f"Accuracy: {accuracy}")

# Plot confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()


report_dict = classification_report(y_test, y_pred, output_dict=True)

# Convert the classification report dictionary to a DataFrame
import pandas as pd

# Extract class names
class_names = list(report_dict.keys())
class_names.remove('accuracy')  # Remove 'accuracy' from the class names list

# Create DataFrame from classification report
report_df = pd.DataFrame(report_dict).transpose()
report_df = report_df.reset_index().rename(columns={'index': 'Class'})

# Save the classification report to a PDF
pdf_filename = 'classification_report.pdf'

with PdfPages(pdf_filename) as pdf:
    plt.figure(figsize=(12, 8))
    plt.table(cellText=report_df.values,
              colLabels=report_df.columns,
              rowLabels=report_df['Class'],
              cellLoc='center',
              loc='center')
    plt.axis('off')
    plt.title(f'Classification Report\nAccuracy: {accuracy:.2f}')
    
    # Save the plot to the PDF
    pdf.savefig()
    plt.close()

print(f'Classification report saved to {pdf_filename}')


In [None]:
model.save('Noise Classifier.h5')