In [2]:
import pandas as pd

# Load dataset
data = pd.read_csv('/content/Phising_Detection_Dataset.csv')

# Assuming the last column is the label and the rest are features
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

class MalwareDetectionModel(nn.Module):
    def __init__(self):
        super(MalwareDetectionModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

model = MalwareDetectionModel()


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define your neural network model
class MyModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation for binary classification

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)  # Applying sigmoid to get probabilities
        return out

In [9]:
# Split the data (Replace X and y with your actual data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [12]:
# Ensure that y_train and y_test are in the range [0, 1] for binary classification
y_train = y_train.astype(float)  # Convert y_train to float type
y_test = y_test.astype(float)  # Convert y_test to float type


In [28]:
# Model, criterion, and optimizer initialization
input_size = X_train.shape[1]
output_size = 1  # For binary classification
hidden_size = 64
model = MyModel(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [23]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [29]:
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)

    # Print shapes for debugging
    print(outputs.shape, y_train_tensor.shape)

    # Ensure shapes compatibility
    assert outputs.shape == y_train_tensor.shape, "Shapes mismatch: outputs and y_train_tensor"

    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Evaluate on the test set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')

torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
Epoch [10/50], Train Loss: nan, Test Loss: nan
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([530072, 1])
torch.Size([530072, 1]) torch.Size([53007

In [30]:
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    predicted = (outputs > 0.5).float()
    accuracy = (predicted.eq(y_test_tensor).sum() / y_test_tensor.shape[0]).item()
    print(f'Accuracy: {accuracy:.4f}')


Accuracy: 0.7997


In [31]:
torch.save(model.state_dict(), 'malware_detection_model.pth')
