In [2]:
%pip install numpy pandas scikit-learn

from contextlib import contextmanager
from pathlib import Path
from timeit import default_timer

from numpy import float64, uint8
from pandas import DataFrame, Series

from sklearn.datasets import fetch_openml
from sklearn.linear_model import SGDClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


In [3]:
@contextmanager
def timer():
  '''from given starter code'''

  start = default_timer()
  def elapser(): return default_timer() - start
  yield lambda: elapser()
  end = default_timer()
  def elapser(): return end - start

def time(name: str, model: SGDClassifier | SVC, X: DataFrame, y: Series) -> None:
  '''
  time fitting models
  :param name: the name of the model
  :type name: str
  :param model: the model to fit
  :type model: SGDClassifier | SVC
  :param X: the training feature set
  :type X: DataFrame
  :param y: the training label set
  :type y: Series
  '''

  file: str = f'times/{name}.txt'
  if Path(file).is_file():
    with open(file, 'r') as time:
      print(f'runtime: {time.read()}')
  else:
    with timer() as fit_timer:
      model.fit(X, y)

    print(f'runtime: {fit_timer()}')
    with open(file, 'w') as time:
      time.write(str(fit_timer()))

In [4]:
mnist: DataFrame = fetch_openml('mnist_784', version=1)

X: DataFrame = mnist['data'].astype(float64)
y: Series = mnist['target'].astype(uint8)

X_train: DataFrame
X_test: DataFrame
X_train, X_test = X[:60000], X[60000:]

y_train: Series
y_test: Series
y_train, y_test = y[:60000], y[60000:]

scaler: StandardScaler = StandardScaler()
X_train_scale: DataFrame = scaler.fit_transform(X_train)
X_test_scale: DataFrame = scaler.transform(X_test)

In [5]:
time('sgd', SGDClassifier(random_state=42), X_train_scale, y_train)

runtime: 225.77199569999993


In [6]:
time('svc_1000', SVC(gamma='auto', random_state=42), X_train_scale[:1000], y_train[:1000])

runtime: 0.12480860000050598


In [7]:
time('svc_2000', SVC(gamma='auto', random_state=42), X_train_scale[:2000], y_train[:2000])

runtime: 0.3617899999999281


In [8]:
time('svc_4000', SVC(gamma='auto', random_state=42), X_train_scale[:4000], y_train[:4000])

runtime: 1.2012546000005386


In [9]:
time('ovr_1000', OneVsRestClassifier(SVC(gamma='auto', random_state=42)), X_train_scale[:1000], y_train[:1000])

runtime: 0.3732375999998112


In [10]:
time('ovr_2000', OneVsRestClassifier(SVC(gamma='auto', random_state=42)), X_train_scale[:2000], y_train[:2000])

runtime: 1.6762154000007286


In [11]:
time('ovr_4000', OneVsRestClassifier(SVC(gamma='auto', random_state=42)), X_train_scale[:4000], y_train[:4000])

runtime: 7.157275699999445
