In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the KDDTrain dataset from a CSV file
file_path = 'KDDTrain.csv'  
data = pd.read_csv(file_path)

# Separate features and target
# Assuming the last column is the target, adjust as needed
x = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Perform train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")


x_train shape: (100778, 42)
x_test shape: (25195, 42)
y_train shape: (100778,)
y_test shape: (25195,)


In [2]:
import numpy as np
from sklearn.metrics import accuracy_score

class CustomLogisticRegression:
    def __init__(self):
        self.weights = None
        self.bias = 0
        self.train_accuracies = []
        self.losses = []

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _transform_x(self, x):
        return np.array(x)

    def _transform_y(self, y):
        return np.array(y)

    def compute_loss(self, y, pred):
        return -np.mean(y * np.log(pred) + (1 - y) * np.log(1 - pred))

    def compute_gradients(self, x, y, pred):
        error = pred - y
        error_w = np.dot(error, x) / x.shape[0]
        error_b = np.mean(error)
        return error_w, error_b

    def update_model_parameters(self, error_w, error_b, learning_rate=0.01):
        self.weights -= learning_rate * error_w
        self.bias -= learning_rate * error_b

    def fit(self, x, y, epochs=100):
        x = self._transform_x(x)
        y = self._transform_y(y)

        self.weights = np.zeros(x.shape[1])
        self.bias = 0

        for i in range(epochs):
            x_dot_weights = np.dot(x, self.weights) + self.bias
            pred = self._sigmoid(x_dot_weights)
            loss = self.compute_loss(y, pred)
            error_w, error_b = self.compute_gradients(x, y, pred)
            self.update_model_parameters(error_w, error_b)

            pred_to_class = [1 if p > 0.5 else 0 for p in pred]
            self.train_accuracies.append(accuracy_score(y, pred_to_class))
            self.losses.append(loss)

    def predict(self, x):
        x = self._transform_x(x)
        x_dot_weights = np.dot(x, self.weights) + self.bias
        pred = self._sigmoid(x_dot_weights)
        return [1 if p > 0.5 else 0 for p in pred]


In [3]:
import numpy as np
from sklearn.metrics import accuracy_score

class CustomLogisticRegression:
    def __init__(self):
        self.weights = None
        self.bias = 0
        self.train_accuracies = []
        self.losses = []

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _transform_x(self, x):
        return np.array(x)

    def _transform_y(self, y):
        return np.array(y)

    def compute_loss(self, y, pred):
        return -np.mean(y * np.log(pred) + (1 - y) * np.log(1 - pred))

    def compute_gradients(self, x, y, pred):
        error = pred - y
        error_w = np.dot(error, x) / x.shape[0]
        error_b = np.mean(error)
        return error_w, error_b

    def update_model_parameters(self, error_w, error_b, learning_rate=0.01):
        self.weights -= learning_rate * error_w
        self.bias -= learning_rate * error_b

    def fit(self, x, y, epochs=100):
        x = self._transform_x(x)
        y = self._transform_y(y)

        self.weights = np.zeros(x.shape[1])
        self.bias = 0

        for i in range(epochs):
            x_dot_weights = np.dot(x, self.weights) + self.bias
            pred = self._sigmoid(x_dot_weights)
            loss = self.compute_loss(y, pred)
            error_w, error_b = self.compute_gradients(x, y, pred)
            self.update_model_parameters(error_w, error_b)

            pred_to_class = [1 if p > 0.5 else 0 for p in pred]
            self.train_accuracies.append(accuracy_score(y, pred_to_class))
            self.losses.append(loss)

    def predict(self, x):
        x = self._transform_x(x)
        x_dot_weights = np.dot(x, self.weights) + self.bias
        pred = self._sigmoid(x_dot_weights)
        return [1 if p > 0.5 else 0 for p in pred]


In [4]:
print(f"x_train shape before fit: {x_train.shape}")
print(f"y_train shape before fit: {y_train.shape}")


x_train shape before fit: (100778, 42)
y_train shape before fit: (100778,)


In [5]:
if not isinstance(x_train, np.ndarray):
    x_train = np.array(x_train)
if not isinstance(y_train, np.ndarray):
    y_train = np.array(y_train)


In [7]:
import numpy as np
from sklearn.metrics import accuracy_score

class CustomLogisticRegression:
    def __init__(self):
        self.weights = None
        self.bias = 0
        self.train_accuracies = []
        self.losses = []

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _transform_x(self, x):
        return np.array(x)

    def _transform_y(self, y):
        return np.array(y)

    def compute_loss(self, y, pred):
        return -np.mean(y * np.log(pred) + (1 - y) * np.log(1 - pred))

    def compute_gradients(self, x, y, pred):
        error = pred - y
        error_w = np.dot(error, x) / x.shape[0]
        error_b = np.mean(error)
        return error_w, error_b

    def update_model_parameters(self, error_w, error_b, learning_rate=0.01):
        self.weights -= learning_rate * error_w
        self.bias -= learning_rate * error_b

    def fit(self, x, y, epochs=100):
        x = self._transform_x(x)
        y = self._transform_y(y)

        self.weights = np.zeros(x.shape[1])
        self.bias = 0

        for i in range(epochs):
            x_dot_weights = np.dot(x, self.weights) + self.bias
            pred = self._sigmoid(x_dot_weights)
            loss = self.compute_loss(y, pred)
            error_w, error_b = self.compute_gradients(x, y, pred)
            self.update_model_parameters(error_w, error_b)

            pred_to_class = [1 if p > 0.5 else 0 for p in pred]
            self.train_accuracies.append(accuracy_score(y, pred_to_class))
            self.losses.append(loss)

    def predict(self, x):
        x = self._transform_x(x)
        x_dot_weights = np.dot(x, self.weights) + self.bias
        pred = self._sigmoid(x_dot_weights)
        return [1 if p > 0.5 else 0 for p in pred]


In [8]:
print(f"x_train type: {type(x_train)}, shape: {x_train.shape}")
print(f"y_train type: {type(y_train)}, shape: {y_train.shape}")


x_train type: <class 'numpy.ndarray'>, shape: (100778, 42)
y_train type: <class 'numpy.ndarray'>, shape: (100778,)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the KDDTrain dataset from a CSV file
file_path = 'KDDTrain.csv'  # Update this with the actual path to your CSV file
data = pd.read_csv(file_path)

# Separate features and target
# Assuming the last column is the target, adjust as needed
x = data.iloc[:, :-1].values  # Convert to NumPy array
y = data.iloc[:, -1].values  # Convert to NumPy array

# Perform train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets (for debugging)
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")

# Initialize the custom logistic regression model
lr = CustomLogisticRegression()

# Fit the model to the training data
lr.fit(x_train, y_train, epochs=150)

# Make predictions on the test data
y_pred = lr.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Optional: Plot the training accuracy and loss over epochs
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(lr.train_accuracies)
plt.title('Training Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.subplot(1, 2, 2)
plt.plot(lr.losses)
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.tight_layout()
plt.show()
