In [1]:
import numpy as np
import pandas as pd
import time
import psutil
from sklearn.datasets import make_classification
from scipy.special import expit
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train=pd.read_csv('X_train1.csv')
X_train=X_train.to_numpy()
X_test=pd.read_csv('X_test1.csv')
X_test=X_test.to_numpy()
y_train=pd.read_csv('y_train1.csv')
y_train=y_train.to_numpy()
y_train=y_train.reshape(-1)
y_test=pd.read_csv('y_test1.csv')
y_test=y_test.to_numpy().reshape(-1)

In [3]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
def sigmoid(z):
    return expit(z)

In [5]:
import os
from threading import Thread

In [6]:
def get_cpu_and_ram_utilization(pid, duration):
    process = psutil.Process(pid)
    cpu_usages = []
    ram_usages = []
    start_time = time.time()
    while time.time() - start_time < duration:
        cpu_usage = process.cpu_percent(interval=1)
        ram_usage = process.memory_percent()
        cpu_usages.append(cpu_usage)
        ram_usages.append(ram_usage)
        print(f"CPU utilization: {cpu_usage}% | RAM utilization: {ram_usage}%")
    total_cpu_usage = sum(cpu_usages)
    return total_cpu_usage, cpu_usages, ram_usages

In [7]:
pid = os.getpid()

# Duration to monitor CPU usage
monitor_duration = 15

In [8]:
time.sleep(15)

In [9]:
results={}
def logistic_regression_sgd(X, y, learning_rate=0.01, epochs=3):
    train_time_start=time.time()
    m, n = X.shape
    weights = np.zeros(n)
    bias = 0

    for epoch in range(epochs):
        # Shuffle the data for each epoch to ensure better convergence
        indices = np.random.permutation(m)
        X = X[indices]
        y = y[indices]

        for i in range(m):
            xi = X[i]
            yi = y[i]
            linear_output = np.dot(xi, weights) + bias
            y_pred = sigmoid(linear_output)

            # Gradient calculation
            dw = (y_pred - yi) * xi
            db = y_pred - yi

            # Update weights and bias
            weights -= learning_rate * dw
            bias -= learning_rate * db

    results['weights']=weights
    results['bias']=bias
    train_time_end=time.time()
    results['train_time']=train_time_end-train_time_start
    print(f"Training time: {results['train_time']} seconds")

In [None]:
train_thread = Thread(target=logistic_regression_sgd, args=(X_train, y_train))
train_thread.start()

total_cpu_usage, cpu_usages, ram_usages = get_cpu_and_ram_utilization(pid, monitor_duration)
train_thread.join()

print(f"Total CPU utilization over {monitor_duration} seconds: {total_cpu_usage}%")

# Print the CPU and RAM usage per second
print("CPU usage per second:", cpu_usages)
print("RAM usage per second:", ram_usages)

# Calculate total CPU resource consumption in 'CPU-seconds'
cpu_seconds = sum(cpu_usages) / 100
print(f"Total CPU resource consumption: {cpu_seconds} CPU-seconds")

Training time: 0.016467571258544922 seconds
CPU utilization: 1.0% | RAM utilization: 1.5018300355774206%
CPU utilization: 2.0% | RAM utilization: 1.5018300355774206%
CPU utilization: 1.0% | RAM utilization: 1.5018300355774206%
CPU utilization: 1.0% | RAM utilization: 1.5018300355774206%
CPU utilization: 0.0% | RAM utilization: 1.5018300355774206%
CPU utilization: 0.0% | RAM utilization: 1.5018300355774206%
CPU utilization: 1.0% | RAM utilization: 1.5018300355774206%


In [None]:
print("Weights from SGD:", results['weights'])
print("Bias from SGD:", results['bias'])

In [None]:
def predict(X, weights, bias):
    z = np.dot(X, weights) + bias
    y_pred = sigmoid(z)
    return (y_pred > 0.5).astype(int)

In [None]:
test_time_start=time.time()
y_pred_sgd = predict(X_test, results['weights'], results['bias'])
test_time_end=time.time()
results['test_time']=test_time_end-test_time_start
print(f"Testing time: {results['test_time']} seconds")

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred_sgd)
print(f'Accuracy: {accuracy}')

In [None]:
import joblib
joblib.dump((results['weights'], results['bias'], accuracy, y_pred_sgd, cpu_seconds, cpu_usages, ram_usages, results['train_time'], results['test_time']), 'variables_manual_sgd.pkl')