In [1]:
import numpy as np
import time
import psutil
import matplotlib.pyplot as plt
from scipy.special import expit
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
X_train = pd.read_csv('X_train3.csv')
X_train=X_train.to_numpy()
y_train = pd.read_csv('y_train3.csv')
y_train=y_train.to_numpy().reshape(-1)
X_test = pd.read_csv('X_test3.csv')
X_test=X_test.to_numpy()
y_test = pd.read_csv('y_test3.csv')
y_test=y_test.to_numpy().reshape(-1)

y_train = np.where(y_train == 0, -1, 1)
y_test = np.where(y_test == 0, -1, 1)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = np.round(X_train, 6)

In [3]:
from sklearn.linear_model import SGDClassifier

In [4]:
import os
from threading import Thread

In [5]:
def get_cpu_and_ram_utilization(pid, duration):
    process = psutil.Process(pid)
    cpu_usages = []
    ram_usages = []
    start_time = time.time()
    while time.time() - start_time < duration:
        cpu_usage = process.cpu_percent(interval=1)
        ram_usage = process.memory_percent()
        cpu_usages.append(cpu_usage)
        ram_usages.append(ram_usage)
        print(f"CPU utilization: {cpu_usage}% | RAM utilization: {ram_usage}%")
    total_cpu_usage = sum(cpu_usages)
    return total_cpu_usage, cpu_usages, ram_usages

In [6]:
pid = os.getpid()

# Duration to monitor CPU usage
monitor_duration = 3

In [7]:
time.sleep(3)

In [8]:
results={}
def train_svm(X, y, X_test, learning_rate=0.001, lambda_param=0.01, n_iters=2):

    # Convert y to -1 and 1
    y_ = np.where(y <= 0, -1, 1)

    train_time_start=time.time()

    # Compute alpha for regularization
    alpha = 2 * lambda_param

    # Initialize the SGDClassifier with hinge loss for SVM
    sgd_clf = SGDClassifier(
        loss='hinge',
        alpha=alpha,
        learning_rate='constant',
        eta0=learning_rate,
        max_iter=n_iters,
        tol=None,
        random_state=42
    )

    # Fit the model
    sgd_clf.fit(X, y_)

    # Get weights and bias
    results['weights'] = sgd_clf.coef_
    results['bias'] = sgd_clf.intercept_
    train_time_end=time.time()
    results['train_time']=train_time_end-train_time_start

    # Get predictions
    test_time_start=time.time()
    predictions = sgd_clf.predict(X_test)
    test_time_end=time.time()
    results['test_time']=test_time_end-test_time_start
    results['predictions']=predictions
    print(results['train_time'])

In [9]:
train_thread = Thread(target=train_svm, args=(X_train, y_train, X_test))
train_thread.start()

total_cpu_usage, cpu_usages, ram_usages = get_cpu_and_ram_utilization(pid, monitor_duration)
train_thread.join()

print(f"Total CPU utilization over {monitor_duration} seconds: {total_cpu_usage}%")

# Print the CPU and RAM usage per second
print("CPU usage per second:", cpu_usages)
print("RAM usage per second:", ram_usages)

# Calculate total CPU resource consumption in 'CPU-seconds'
cpu_seconds = sum(cpu_usages) / 100
print(f"Total CPU resource consumption: {cpu_seconds} CPU-seconds")

0.022926807403564453
CPU utilization: 1.0% | RAM utilization: 1.5946475893234697%
CPU utilization: 1.0% | RAM utilization: 1.5946475893234697%
CPU utilization: 1.0% | RAM utilization: 1.5946475893234697%
Total CPU utilization over 3 seconds: 3.0%
CPU usage per second: [1.0, 1.0, 1.0]
RAM usage per second: [1.5946475893234697, 1.5946475893234697, 1.5946475893234697]
Total CPU resource consumption: 0.03 CPU-seconds


In [10]:
print(results['weights'])

[[ 0.04740904  0.21305059 -0.01169338  0.17062014 -0.1297572   0.01380176
  -0.02534812  0.02317172  0.34741428  0.1045154   0.19215351  0.22666325
   0.12714138  0.08809487  0.13486114  0.09261589  0.11591513 -0.06383095
  -0.07720103 -0.22246344 -0.27685293 -0.10442334]]


In [11]:
# results['predictions']
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, results['predictions'])
print(f'Accuracy: {accuracy}')

Accuracy: 0.825925925925926


In [12]:
import joblib
joblib.dump((results['weights'][0], results['bias'][0], accuracy, results['predictions'], cpu_seconds, cpu_usages, ram_usages, results['train_time'], results['test_time']), 'variables_sklearn.pkl')

['variables_sklearn.pkl']