In [67]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
reg = SGDRegressor(random_state=10)
chunksize=20000

In [None]:
iter = 1
for train_df in pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00246/3D_spatial_network.txt', header=None, names=['OSM_ID', 'LONGITUDE', 'LATITUDE', 'ALTITUDE'], chunksize=chunksize, iterator=True):
  ss.partial_fit(train_df)
  train_df_scaled = ss.transform(train_df)
  # print(f"Partial scaled output is: {train_df_scaled}")
  
  X_train_partial = train_df_scaled[:, 0:3]  # since there are 3 features in dataset
  y_train_partial = train_df_scaled[:, 3] # last column is the label

  reg.fit(X_train_partial, y_train_partial)
  print(f'After iteration #{iter}')
  print(reg.coef_)
  print(reg.intercept_)
  iter += 1


In [41]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [3]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_split = train_test_split(X, y, test_size=0.2, random_state=10)

In [86]:
from sklearn.preprocessing import Normalizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [33]:
pipe = Pipeline([('scaler', Normalizer()),('knn', KNeighborsClassifier(n_neighbors=1))])
gcv = GridSearchCV(pipe, param_grid={'knn__n_neighbors': [2,3,4]}, scoring='accuracy', n_jobs=1)
gcv.fit(X_train, y_train)

In [34]:
gcv.best_estimator_

In [36]:
gcv.best_params_

{'knn__n_neighbors': 3}

In [5]:
from sklearn.svm import SVC
gc = GridSearchCV(estimator=SVC(gamma='auto', random_state=0), param_grid={'kernel': ['linear' , 'rbf'], 'C': [1,15,25]}, cv=4)

In [9]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_split = train_test_split(X, y, test_size=0.3, random_state=10)

In [18]:
gc.fit(X_train, y_train)

In [19]:
gc.best_params_

{'C': 1, 'kernel': 'linear'}

In [20]:
gc.best_score_

0.9715099715099715

In [6]:
X = pd.read_csv('./data/Social_Network_Ads.csv')
X.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [7]:
y = X['Purchased']

In [8]:
X = X.drop(columns=['Purchased'])

In [9]:
X.shape, y.shape

((400, 2), (400,))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [11]:
X_train.head()

Unnamed: 0,Age,EstimatedSalary
250,44,39000
63,32,120000
312,38,50000
159,32,135000
283,52,21000


In [12]:
X_test.head()

Unnamed: 0,Age,EstimatedSalary
132,30,87000
309,38,50000
341,35,75000
196,30,79000
246,35,50000


In [13]:
from sklearn.svm import LinearSVC
sv = LinearSVC(random_state=0)
ss = StandardScaler()

In [14]:
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [15]:
sv.fit(X_train, y_train)
y_pred = sv.predict(X_test)

In [60]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [17]:
accuracy_score(y_test, y_pred)

0.9

In [18]:
confusion_matrix(y_test, y_pred)

array([[66,  2],
       [ 8, 24]], dtype=int64)

In [2]:
from sklearn.datasets import fetch_openml

In [3]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, cache=True)

In [4]:
X = X.to_numpy()

In [8]:
y = y.to_numpy()

In [9]:
X_train, X_test, y_train, y_test = X[:20000], X[20000:25000], y[:20000], y[20000:25000]

In [10]:
from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler()

In [11]:
X_train = mm.fit_transform(X_train)
X_test = mm.transform(X_test)

In [16]:
from sklearn.svm import SVC
sv = SVC(kernel='linear', decision_function_shape='ovr',class_weight=None)

In [17]:
sv.fit(X_train, y_train)

In [18]:
y_pred = sv.predict(X_test)

In [32]:
z = confusion_matrix(y_test, y_pred)
z

array([[469,   0,   1,   0,   0,   5,   1,   1,   1,   0],
       [  1, 555,   2,   4,   0,   1,   1,   1,   3,   0],
       [  5,   4, 479,   6,   9,   1,   3,   6,   7,   1],
       [  3,   3,  13, 462,   0,  19,   0,   3,   9,   4],
       [  2,   0,   6,   0, 468,   1,   2,   4,   1,  16],
       [ 14,   0,   0,  18,   2, 405,   3,   2,   7,   9],
       [  1,   2,   2,   0,   6,   9, 471,   0,   0,   0],
       [  0,   2,   4,   1,   7,   1,   0, 483,   0,   6],
       [  3,  10,  11,  15,   0,  19,   6,   3, 396,   3],
       [  3,   6,   1,   2,  21,   0,   0,  26,   2, 435]], dtype=int64)

In [28]:
c = 0
for i in range(len(z)):
    c += z[i][i]
c

4623

In [29]:
from sklearn.metrics import f1_score, recall_score, precision_score

In [38]:
f1_score(y_test, y_pred, average='micro')

0.9246

In [37]:
recall_score(y_test, y_pred, average='micro')

0.9246

In [39]:
precision_score(y_test, y_pred, average='micro')

0.9246

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [50]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
sv = SVC(kernel='poly', degree=3, decision_function_shape='ovr', class_weight='balanced', C=10)

In [51]:
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [52]:
sv.fit(X_train, y_train)


In [54]:
y_pred = sv.predict(X_test)

In [55]:
f1_score(y_test, y_pred, average='weighted')

0.9723258740744225

In [56]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [57]:
sv = SVC(C=10, kernel='poly', gamma='auto')

In [58]:
sv.fit(X_train,y_train)

In [59]:
y_pred = sv.predict(X_test)

In [61]:
accuracy_score(y_test, y_pred)

1.0

In [62]:
sv = SVC(C=25, kernel='sigmoid', gamma='auto')
sv.fit(X_train, y_train)
y_pred = sv.predict(X_test)

In [63]:
accuracy_score(y_test, y_pred)

0.28888888888888886

In [134]:
iris = load_iris()
iris_df=pd.DataFrame(iris.data)
iris_df['class']=iris.target

In [135]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [136]:
iris_df.drop(iris_df[iris_df['class']==0].index, inplace=True)

In [137]:
iris_df['class'].unique()

array([1, 2])

In [138]:
y = iris_df['class']
X = iris_df.drop(columns='class')

In [139]:
X.shape, y.shape

((100, 4), (100,))

In [140]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [145]:
mm = MinMaxScaler()
sv = SVC()
pipe = Pipeline([('Scaler', mm),('classifier', sv)])

In [146]:
pipe.fit(X_train, y_train)

In [147]:
y_pred = pipe.predict(X_test)
y_pred.shape, y_test.shape, X_test.shape

((25,), (25,), (25, 4))

In [149]:
pipe.score(X_test, y_test)

0.92

In [148]:
precision_score(y_test, y_pred)

1.0