# BiGRU All Feature

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from google.colab import drive
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Bidirectional, Dropout
from tensorflow.keras.utils import to_categorical

# Mount Google Drive
drive.mount('/content/drive')

data_path = '/content/drive/My Drive/Colab Notebooks/phishing+websites/phishingDataset.xlsx'

# Load the dataset
df = pd.read_excel(data_path)
print("First 5 rows of the dataset:")
print(df.head())
print("\nShape of the dataset:", df.shape)

# Menghapus baris duplikat
df.drop_duplicates(inplace=True)
print("Shape after removing duplicates:", df.shape)

# Fungsi untuk membersihkan data byte string
def clean_byte_string(s):
    return s.decode('utf-8') if isinstance(s, bytes) else s

# Menerapkan fungsi ke setiap elemen dalam DataFrame
df = df.applymap(clean_byte_string)

# Mengonversi kolom yang seharusnya numerik
for col in df.columns:
    df[col] = pd.to_numeric(df[col].str.strip("b'"), errors='coerce')

# Memeriksa ulang missing values setelah konversi
missing_values = df.isnull().sum()
print("Missing values in each column after conversion:\n", missing_values)

# Menghapus baris dengan missing values
df.dropna(inplace=True)
print("Shape after removing missing values:", df.shape)

# Normalisasi fitur numerik
scaler = MinMaxScaler()
df[df.columns] = scaler.fit_transform(df)
print("First 5 rows after normalization:\n", df.head())

# Memisahkan fitur dan label
X = df.drop('Result', axis=1)
y = df['Result']

# Ubah label menjadi kategori
y = to_categorical(y)

# Memvisualisasikan jumlah masing-masing kelas sebagai diagram pie
class_counts = pd.DataFrame(y).idxmax(axis=1).value_counts()
class_labels = class_counts.index
class_sizes = class_counts.values
class_percentages = (class_sizes / class_sizes.sum()) * 100

plt.figure(figsize=(10, 6))
plt.pie(class_sizes, labels=[f'{label}: {size} ({percentage:.2f}%)' for label, size, percentage in zip(class_labels, class_sizes, class_percentages)],
        autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
plt.title('Distribution of Classes')
plt.axis('equal')
plt.show()

# Inisialisasi model BiGRU
def create_model(input_shape, output_shape):
    model = Sequential()
    model.add(Bidirectional(GRU(64, return_sequences=True), input_shape=input_shape))
    model.add(Bidirectional(GRU(64)))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(output_shape, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
auc_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Reshape data untuk sesuai dengan input GRU
    X_train_reshaped = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_reshaped = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Buat dan latih model
    model = create_model((X_train.shape[1], 1), y_train.shape[1])
    history = model.fit(X_train_reshaped, y_train, epochs=25, batch_size=32, validation_split=0.2, verbose=2)

    # Memprediksi data testing
    y_pred = model.predict(X_test_reshaped)
    y_pred_classes = y_pred.argmax(axis=1)
    y_test_classes = y_test.argmax(axis=1)

    # Evaluasi model
    accuracy_scores.append(accuracy_score(y_test_classes, y_pred_classes))
    precision_scores.append(precision_score(y_test_classes, y_pred_classes))
    recall_scores.append(recall_score(y_test_classes, y_pred_classes))
    f1_scores.append(f1_score(y_test_classes, y_pred_classes))
    auc_scores.append(roc_auc_score(y_test_classes, y_pred_classes))

    # Confusion Matrix
    cm = confusion_matrix(y_test_classes, y_pred_classes)
    plt.figure(figsize=(10, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

print(f"Mean Accuracy: {sum(accuracy_scores)/len(accuracy_scores)}")
print(f"Mean Precision: {sum(precision_scores)/len(precision_scores)}")
print(f"Mean Recall: {sum(recall_scores)/len(recall_scores)}")
print(f"Mean F1 Score: {sum(f1_scores)/len(f1_scores)}")
print(f"Mean AUC: {sum(auc_scores)/len(auc_scores)}")



# BiGRU+FSFM

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from google.colab import drive
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Bidirectional, Dropout
from tensorflow.keras.utils import to_categorical

# Mount Google Drive
drive.mount('/content/drive')

data_path = '/content/drive/My Drive/Colab Notebooks/phishing+websites/phishingDataset.xlsx'

# Load the dataset
df = pd.read_excel(data_path)
print("First 5 rows of the dataset:")
print(df.head())
print("\nShape of the dataset:", df.shape)

# Menghapus baris duplikat
df.drop_duplicates(inplace=True)
print("Shape after removing duplicates:", df.shape)

# Fungsi untuk membersihkan data byte string
def clean_byte_string(s):
    return s.decode('utf-8') if isinstance(s, bytes) else s

# Menerapkan fungsi ke setiap elemen dalam DataFrame
df = df.applymap(clean_byte_string)

# Mengonversi kolom yang seharusnya numerik
for col in df.columns:
    df[col] = pd.to_numeric(df[col].str.strip("b'"), errors='coerce')

# Memeriksa ulang missing values setelah konversi
missing_values = df.isnull().sum()
print("Missing values in each column after conversion:\n", missing_values)

# Menghapus baris dengan missing values
df.dropna(inplace=True)
print("Shape after removing missing values:", df.shape)

# Normalisasi fitur numerik
scaler = MinMaxScaler()
df[df.columns] = scaler.fit_transform(df)
print("First 5 rows after normalization:\n", df.head())

# Memisahkan fitur dan label
selected_features = [14, 8, 26, 7, 6, 15, 16, 29, 1, 27, 2]
X = df.iloc[:, selected_features]
y = df['Result']

# Ubah label menjadi kategori
y = to_categorical(y)

# Memvisualisasikan jumlah masing-masing kelas sebagai diagram pie
class_counts = pd.DataFrame(y).idxmax(axis=1).value_counts()
class_labels = class_counts.index
class_sizes = class_counts.values
class_percentages = (class_sizes / class_sizes.sum()) * 100

plt.figure(figsize=(10, 6))
plt.pie(class_sizes, labels=[f'{label}: {size} ({percentage:.2f}%)' for label, size, percentage in zip(class_labels, class_sizes, class_percentages)],
        autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
plt.title('Distribution of Classes')
plt.axis('equal')
plt.show()

# Inisialisasi model BiGRU
def create_model(input_shape, output_shape):
    model = Sequential()
    model.add(Bidirectional(GRU(64, return_sequences=True), input_shape=input_shape))
    model.add(Bidirectional(GRU(64)))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(output_shape, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
auc_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Reshape data untuk sesuai dengan input GRU
    X_train_reshaped = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_reshaped = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Buat dan latih model
    model = create_model((X_train.shape[1], 1), y_train.shape[1])
    history = model.fit(X_train_reshaped, y_train, epochs=25, batch_size=32, validation_split=0.2, verbose=2)

    # Memprediksi data testing
    y_pred = model.predict(X_test_reshaped)
    y_pred_classes = y_pred.argmax(axis=1)
    y_test_classes = y_test.argmax(axis=1)

    # Evaluasi model
    accuracy_scores.append(accuracy_score(y_test_classes, y_pred_classes))
    precision_scores.append(precision_score(y_test_classes, y_pred_classes))
    recall_scores.append(recall_score(y_test_classes, y_pred_classes))
    f1_scores.append(f1_score(y_test_classes, y_pred_classes))
    auc_scores.append(roc_auc_score(y_test_classes, y_pred_classes))

    # Confusion Matrix
    cm = confusion_matrix(y_test_classes, y_pred_classes)
    plt.figure(figsize=(10, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

print(f"Mean Accuracy: {sum(accuracy_scores)/len(accuracy_scores)}")
print(f"Mean Precision: {sum(precision_scores)/len(precision_scores)}")
print(f"Mean Recall: {sum(recall_scores)/len(recall_scores)}")
print(f"Mean F1 Score: {sum(f1_scores)/len(f1_scores)}")
print(f"Mean AUC: {sum(auc_scores)/len(auc_scores)}")


# BiGRU+FSOR

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from google.colab import drive
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Bidirectional, Dropout
from tensorflow.keras.utils import to_categorical

# Mount Google Drive
drive.mount('/content/drive')

data_path = '/content/drive/My Drive/Colab Notebooks/phishing+websites/phishingDataset.xlsx'

# Load the dataset
df = pd.read_excel(data_path)
print("First 5 rows of the dataset:")
print(df.head())
print("\nShape of the dataset:", df.shape)

# Menghapus baris duplikat
df.drop_duplicates(inplace=True)
print("Shape after removing duplicates:", df.shape)

# Fungsi untuk membersihkan data byte string
def clean_byte_string(s):
    return s.decode('utf-8') if isinstance(s, bytes) else s

# Menerapkan fungsi ke setiap elemen dalam DataFrame
df = df.applymap(clean_byte_string)

# Mengonversi kolom yang seharusnya numerik
for col in df.columns:
    df[col] = pd.to_numeric(df[col].str.strip("b'"), errors='coerce')

# Memeriksa ulang missing values setelah konversi
missing_values = df.isnull().sum()
print("Missing values in each column after conversion:\n", missing_values)

# Menghapus baris dengan missing values
df.dropna(inplace=True)
print("Shape after removing missing values:", df.shape)

# Normalisasi fitur numerik
scaler = MinMaxScaler()
df[df.columns] = scaler.fit_transform(df)
print("First 5 rows after normalization:\n", df.head())

# Memisahkan fitur dan label
selected_features = [1, 2, 3, 4, 6, 7, 8, 9, 11, 13, 14, 15, 16, 20, 21, 24, 25, 26, 27, 28, 29, 30]
X = df.iloc[:, selected_features]
y = df['Result']

# Ubah label menjadi kategori
y = to_categorical(y)

# Memvisualisasikan jumlah masing-masing kelas sebagai diagram pie
class_counts = pd.DataFrame(y).idxmax(axis=1).value_counts()
class_labels = class_counts.index
class_sizes = class_counts.values
class_percentages = (class_sizes / class_sizes.sum()) * 100

plt.figure(figsize=(10, 6))
plt.pie(class_sizes, labels=[f'{label}: {size} ({percentage:.2f}%)' for label, size, percentage in zip(class_labels, class_sizes, class_percentages)],
        autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
plt.title('Distribution of Classes')
plt.axis('equal')
plt.show()

# Inisialisasi model BiGRU
def create_model(input_shape, output_shape):
    model = Sequential()
    model.add(Bidirectional(GRU(64, return_sequences=True), input_shape=input_shape))
    model.add(Bidirectional(GRU(64)))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(output_shape, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
auc_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Reshape data untuk sesuai dengan input GRU
    X_train_reshaped = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_reshaped = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Buat dan latih model
    model = create_model((X_train.shape[1], 1), y_train.shape[1])
    history = model.fit(X_train_reshaped, y_train, epochs=25, batch_size=32, validation_split=0.2, verbose=2)

    # Memprediksi data testing
    y_pred = model.predict(X_test_reshaped)
    y_pred_classes = y_pred.argmax(axis=1)
    y_test_classes = y_test.argmax(axis=1)

    # Evaluasi model
    accuracy_scores.append(accuracy_score(y_test_classes, y_pred_classes))
    precision_scores.append(precision_score(y_test_classes, y_pred_classes))
    recall_scores.append(recall_score(y_test_classes, y_pred_classes))
    f1_scores.append(f1_score(y_test_classes, y_pred_classes))
    auc_scores.append(roc_auc_score(y_test_classes, y_pred_classes))

    # Confusion Matrix
    cm = confusion_matrix(y_test_classes, y_pred_classes)
    plt.figure(figsize=(10, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

print(f"Mean Accuracy: {sum(accuracy_scores)/len(accuracy_scores)}")
print(f"Mean Precision: {sum(precision_scores)/len(precision_scores)}")
print(f"Mean Recall: {sum(recall_scores)/len(recall_scores)}")
print(f"Mean F1 Score: {sum(f1_scores)/len(f1_scores)}")
print(f"Mean AUC: {sum(auc_scores)/len(auc_scores)}")
