In [1]:
import numpy as np
import time
import psutil
import matplotlib.pyplot as plt
from scipy.special import expit
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
X_train = pd.read_csv('X_train3.csv')
X_train=X_train.to_numpy()
y_train = pd.read_csv('y_train3.csv')
y_train=y_train.to_numpy().reshape(-1)
X_test = pd.read_csv('X_test3.csv')
X_test=X_test.to_numpy()
y_test = pd.read_csv('y_test3.csv')
y_test=y_test.to_numpy().reshape(-1)

y_train = np.where(y_train == 0, -1, 1)
y_test = np.where(y_test == 0, -1, 1)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [3]:
from sklearn.linear_model import SGDClassifier

In [4]:
import os
from threading import Thread

In [5]:
def get_cpu_and_ram_utilization(pid, duration):
    process = psutil.Process(pid)
    cpu_usages = []
    ram_usages = []
    start_time = time.time()
    while time.time() - start_time < duration:
        cpu_usage = process.cpu_percent(interval=1)
        ram_usage = process.memory_percent()
        cpu_usages.append(cpu_usage)
        ram_usages.append(ram_usage)
        print(f"CPU utilization: {cpu_usage}% | RAM utilization: {ram_usage}%")
    total_cpu_usage = sum(cpu_usages)
    return total_cpu_usage, cpu_usages, ram_usages

In [6]:
pid = os.getpid()

# Duration to monitor CPU usage
monitor_duration = 5

In [7]:
time.sleep(3)

In [8]:
results={}
def train_svm_classifier(X,y):
  train_time_start=time.time()
  C=1 #change C=1 when there are mid or high attributes
  alpha = 1.0 / (X_train.shape[0] * C)
  sgd_clf = SGDClassifier(loss='hinge', alpha=alpha, max_iter=3, tol=None, random_state=42)
  sgd_clf.fit(X, y)
  results['weights']=sgd_clf.coef_
  results['bias']=sgd_clf.intercept_
  results['model']=sgd_clf
  train_time_end=time.time()
  results['train_time']=train_time_end-train_time_start
  print(results['train_time'])

In [9]:
train_thread = Thread(target=train_svm_classifier, args=(X_train, y_train))
train_thread.start()

total_cpu_usage, cpu_usages, ram_usages = get_cpu_and_ram_utilization(pid, monitor_duration)
train_thread.join()

print(f"Total CPU utilization over {monitor_duration} seconds: {total_cpu_usage}%")

# Print the CPU and RAM usage per second
print("CPU usage per second:", cpu_usages)
print("RAM usage per second:", ram_usages)

# Calculate total CPU resource consumption in 'CPU-seconds'
cpu_seconds = sum(cpu_usages) / 100
print(f"Total CPU resource consumption: {cpu_seconds} CPU-seconds")

0.012542009353637695
CPU utilization: 2.0% | RAM utilization: 1.5969650410896237%
CPU utilization: 1.0% | RAM utilization: 1.5969650410896237%
CPU utilization: 1.0% | RAM utilization: 1.5969650410896237%
CPU utilization: 1.0% | RAM utilization: 1.5969650410896237%
CPU utilization: 1.0% | RAM utilization: 1.5969650410896237%
Total CPU utilization over 5 seconds: 6.0%
CPU usage per second: [2.0, 1.0, 1.0, 1.0, 1.0]
RAM usage per second: [1.5969650410896237, 1.5969650410896237, 1.5969650410896237, 1.5969650410896237, 1.5969650410896237]
Total CPU resource consumption: 0.06 CPU-seconds


In [10]:
print(results['weights'])

[[-0.36173956  0.18412256 -0.16071957 -0.49381261 -0.4620675  -1.02260004
  -0.48998162  0.22615535 -0.7987187   0.07004327 -0.08662677 -0.90874068
   0.07210211  0.74685711  1.44505734]]


In [11]:
results['train_time']

0.012542009353637695

In [12]:
test_time_start=time.time()
y_pred=results['model'].predict(X_test)
test_time_end=time.time()

In [13]:
results['test_time']=test_time_end-test_time_start
results['test_time']

0.0007753372192382812

In [14]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.8518518518518519


In [15]:
import joblib
joblib.dump((results['weights'][0], results['bias'][0], accuracy, y_pred, cpu_seconds, cpu_usages, ram_usages, results['train_time'], results['test_time']), 'variables_sklearn.pkl')

['variables_sklearn.pkl']

In [16]:
y_test

array([-1, -1,  1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1,  1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1, -1, -1, -1,
       -1, -1,  1, -1, -1, -1, -1, -1, -1, -1,  1,  1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1,  1,  1, -1, -1, -1, -1, -1, -1,  1, -1, -1,
       -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1, -1, -1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,  1, -1, -1, -1, -1, -1, -1,
        1, -1, -1, -1,  1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1,
       -1,  1, -1, -1, -1, -1, -1, -1, -1,  1, -1,  1,  1, -1, -1, -1, -1,
       -1, -1,  1, -1, -1, -1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1

In [17]:
y_pred

array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1

In [18]:
results['bias']

array([-5.2148949])