In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split, GridSearchCV
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns; sns.set_style("dark")
import time

In [2]:
train = pd.read_csv("../data/train_small.csv")

In [3]:
cols = ['type', 'air_temperature_k', 'process_temperature_k','rotational_speed_rpm', 'torque_nm', 'tool_wear_min']

In [4]:
#train = train[cols]
x_train = train[cols]
y_train = train["target"]

In [5]:
sizes = np.linspace(0.1,1,10)
sizes = list(sizes)
sizes

[0.1,
 0.2,
 0.30000000000000004,
 0.4,
 0.5,
 0.6,
 0.7000000000000001,
 0.8,
 0.9,
 1.0]

In [6]:
cols[:len(cols)+1]

['type',
 'air_temperature_k',
 'process_temperature_k',
 'rotational_speed_rpm',
 'torque_nm',
 'tool_wear_min']

In [7]:
sizes[0]

0.1

In [8]:
numbers = np.random.randint(0,high=len(train), size=round(len(train)*sizes[0]))

In [9]:
typ = "linear"
model = SVC(kernel=typ)
size1 = pd.DataFrame()
for i in range(len(sizes)):
    numbers = np.random.randint(0,high=len(train), size=round(len(train)*sizes[i]))
    new_x = x_train.iloc[numbers,:].reset_index(drop=True)
    new_y = y_train.iloc[numbers]
    start = time.time()
    model.fit(new_x, new_y)
    stop = time.time()
    elapsed=stop-start
    size1.loc[i, "size"] = sizes[i]*len(train)
    size1.loc[i, "model"] = "SVC_linear"
    size1.loc[i, "runtime"] = elapsed
    #size1.loc[i,"kernel"] = typ

In [10]:
size1

Unnamed: 0,size,model,runtime
0,55.6,SVC_linear,0.026839
1,111.2,SVC_linear,0.003774
2,166.8,SVC_linear,0.00418
3,222.4,SVC_linear,0.005017
4,278.0,SVC_linear,0.005655
5,333.6,SVC_linear,0.006479
6,389.2,SVC_linear,0.007589
7,444.8,SVC_linear,0.009057
8,500.4,SVC_linear,0.009817
9,556.0,SVC_linear,0.011176


In [11]:
typ = "rbf"
model = SVC(kernel=typ)
size2 = pd.DataFrame()
for i in range(len(sizes)):
    numbers = np.random.randint(0,high=len(train), size=round(len(train)*sizes[i]))
    new_x = x_train.iloc[numbers,:].reset_index(drop=True)
    new_y = y_train.iloc[numbers]
    start = time.time()
    model.fit(new_x, new_y)
    stop = time.time()
    elapsed=stop-start
    size2.loc[i, "size"] = sizes[i]*len(train)
    size2.loc[i, "model"] = "SVC_rbf"
    size2.loc[i, "runtime"] = elapsed
    #size2.loc[i,"kernel"] = typ

In [12]:
size2

Unnamed: 0,size,model,runtime
0,55.6,SVC_rbf,0.002975
1,111.2,SVC_rbf,0.002617
2,166.8,SVC_rbf,0.003219
3,222.4,SVC_rbf,0.00401
4,278.0,SVC_rbf,0.005165
5,333.6,SVC_rbf,0.006485
6,389.2,SVC_rbf,0.007906
7,444.8,SVC_rbf,0.008922
8,500.4,SVC_rbf,0.010397
9,556.0,SVC_rbf,0.013288


In [13]:
typ = "linear"
model = SVC(kernel=typ)
feat = pd.DataFrame()
for i in range(len(cols)):
    new_x = x_train.loc[:,cols[:i+1]]
    start = time.time()
    model.fit(new_x, y_train)
    stop = time.time()
    elapsed=stop-start
    feat.loc[i, "num_features"] = i + 1
    feat.loc[i, "model"] = "SVC_linear"
    feat.loc[i, "runtime"] = elapsed
    #feat.loc[i,"kernel"] = typ

In [14]:
feat

Unnamed: 0,num_features,model,runtime
0,1.0,SVC_linear,0.011405
1,2.0,SVC_linear,0.011774
2,3.0,SVC_linear,0.011279
3,4.0,SVC_linear,0.010649
4,5.0,SVC_linear,0.009748
5,6.0,SVC_linear,0.009242


In [15]:
typ = "rbf"
model = SVC(kernel=typ)
feat2 = pd.DataFrame()
for i in range(len(cols)):
    new_x = x_train.loc[:,cols[:i+1]]
    start = time.time()
    model.fit(new_x, y_train)
    stop = time.time()
    elapsed=stop-start
    feat2.loc[i, "num_features"] = i + 1
    feat2.loc[i, "model"] = "SVC_rbf"
    feat2.loc[i, "runtime"] = elapsed
    #feat2.loc[i,"kernel"] = typ

In [16]:
feat2

Unnamed: 0,num_features,model,runtime
0,1.0,SVC_rbf,0.019105
1,2.0,SVC_rbf,0.020667
2,3.0,SVC_rbf,0.019894
3,4.0,SVC_rbf,0.015366
4,5.0,SVC_rbf,0.013959
5,6.0,SVC_rbf,0.013168


In [17]:
size_df = pd.concat([size1, size2], axis = 0).reset_index(drop=True)
size_df.to_csv("../results/runtime_size/svc.csv", index=False)

In [18]:
feat_df = pd.concat([feat, feat2], axis = 0).reset_index(drop=True)
feat_df.to_csv("../results/runtime_features/svc.csv", index=False)