# Scikit-Learn API convention
- to show how scikit-learn objects are made
- all objects share a consistent and simple interface
- The main objects in sklearn are:
   - (one class can implement multiple interfaces)
   - estimator E: E.fit(X,y) or E.fit(X)
   - predictor P: P.predict(X) or P.predict_proba(X) or P.decision_function(X)
   - transformer T: T.transform(X), or T.fit_transform(X)
   - model (goodness of fit): model.score(X)


## Example 1 - unsupervised

In [1]:
import numpy as np
from scipy import sparse
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array, check_is_fitted
from sklearn.utils.sparsefuncs import inplace_column_scale, mean_variance_axis

In [2]:
def _mean_and_std(X, axis=0):
   
    X = np.asarray(X)
    mean_ = X.mean(axis)
    std_ = X.std(axis)

    return mean_, std_

In [3]:
data = np.array([[1,2,3],
                 [4,5,6]]); data

array([[1, 2, 3],
       [4, 5, 6]])

In [4]:
_mean_and_std(data, 0), _mean_and_std(data, 1)

((array([2.5, 3.5, 4.5]), array([1.5, 1.5, 1.5])),
 (array([2., 5.]), array([0.81649658, 0.81649658])))

In [5]:
class my_StandardScaler():

    def __init__(self):
        self.mean_, self.std_ = 0., 0.
        
    def fit(self, X, y=None):
        X = X.astype(np.float32)
        self.mean_, self.std_ = _mean_and_std(X, axis=0)
        return self

    def transform(self, X, y=None):
        X = X.astype(np.float32)
        X -= self.mean_
        X /= self.std_
        return X

    def fit_transform(self, X, y=None):
        X = X.astype(np.float32)
        self.mean_, self.std_ = _mean_and_std(X, axis=0)
        X -= self.mean_
        X /= self.std_
        return X

    def inverse_transform(self, X):
        X = X.astype(np.float32)
        X *= self.std_
        X += self.mean_
        return X

In [6]:
X = np.array([1,2,3,4,5,6,7,8,9,10])
sc = my_StandardScaler()

In [7]:
# dir(sc)

In [8]:
sc.fit(X)

<__main__.my_StandardScaler at 0x1dd51f4a8b0>

In [9]:
sc.mean_, sc.std_

(5.5, 2.8722813)

In [10]:
X_sc = sc.transform(X); X_sc

array([-1.5666989 , -1.2185436 , -0.87038827, -0.52223295, -0.17407766,
        0.17407766,  0.52223295,  0.87038827,  1.2185436 ,  1.5666989 ],
      dtype=float32)

In [11]:
sc.inverse_transform(X_sc)

array([ 1.       ,  1.9999998,  3.       ,  4.       ,  5.       ,
        6.       ,  7.       ,  8.       ,  9.       , 10.       ],
      dtype=float32)

## Example 2 - supervised

In [12]:
import numpy as np
from sklearn.metrics import r2_score

class my_OLS ():

    def __init__(self):
        self.W_ = np.array([])

    def fit(self, X, y):
        for i in range(len(X)+1):
            np.append(self.W_, 0)
        X_b = np.concatenate([X, np.ones((X.shape[0], 1))], axis=1)
        self.W_ = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

    def score(self, X, y):
        pred = self.predict(X)
        return r2_score(pred, y)

    def predict(self, X):
        X_b = np.concatenate([X, np.ones((X.shape[0], 1))], axis=1)
        return X_b@self.W_

In [13]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

X, y = make_regression(n_samples=100, n_features=2, noise=10, random_state=1)
X_sc = StandardScaler().fit_transform(X)

In [14]:
model = LinearRegression()        # create model
model.fit(X_sc,y)                    # train model
print("coefficients and bias: ", model.coef_, model.intercept_)
print(model.score(X_sc,y))
print(model.predict(X_sc[:5]))

coefficients and bias:  [29.52899512 71.88859745] 19.28081595550047
0.9818249824413999
[ 69.28675103 -36.17117272 -38.2733512  -11.02296748 -43.112981  ]


In [15]:
model = my_OLS()
model.fit(X_sc,y)  
print(model.W_)
print(model.score(X_sc,y))
print(model.predict(X_sc[:5]))

[29.52899512 71.88859745 19.28081596]
0.9814885362629435
[ 69.28675103 -36.17117272 -38.2733512  -11.02296748 -43.112981  ]
