## Confidentiality

The programmatic cases in this notebook are utilized from different internet resources.

Please do not copy or distribute this notebook.

## Table of content

Sklearn.base

1. Programmatic case 1 
2. Programmatic case 2 
3. Programmatic case 3 
4. Programmatic case 4 
5. Programmatic case 5 

## Previous knowledge

Please study the following resources for a deep understanding of this notebook.

1.   https://bit.ly/2KvDctl
(sklearn.base)
2.   https://bit.ly/3mINOls 
(Sklearn.base topics)

### Programmatic case 1

In [1]:
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.datasets import load_iris
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

class MyOwnTransformer(BaseEstimator, TransformerMixin):
     def fit(self, X, y=None):
         return self
     def transform(self, X):
         return X

X, y = load_iris(return_X_y=True)
pipe = make_pipeline(MyOwnTransformer(),
                      LogisticRegression(random_state=10,
                                         solver='lbfgs'))
pipe.fit(X, y)  
pipe.predict(X)  

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

### Programmatic case 2

In [2]:
from sklearn.base import RegressorMixin
import numpy as np
from sklearn.datasets import load_diabetes

class MyOwnRegressor(BaseEstimator, RegressorMixin):
     def fit(self, X, y):
         return self
     def predict(self, X):
         return np.mean(X, axis=1)

X, y = load_diabetes(return_X_y=True)
pipe = make_pipeline(MyOwnTransformer(), MyOwnRegressor())
pipe.fit(X, y)  
pipe.predict(X)  
pipe.score(X, y)

-3.90271854560383

### Programmatic case 3

In [3]:
from sklearn.base import ClassifierMixin

class MyOwnClassifier(BaseEstimator, ClassifierMixin):
     def fit(self, X, y):
         self.classes_ = np.unique(y)
         return self
     def predict(self, X):
         return np.random.randint(0, self.classes_.size,
                                  size=X.shape[0])
     def predict_proba(self, X):
         pred = np.random.rand(X.shape[0], self.classes_.size)
         return pred / np.sum(pred, axis=1)[:, np.newaxis]

X, y = load_iris(return_X_y=True)
pipe = make_pipeline(MyOwnTransformer(), MyOwnClassifier())
pipe.fit(X, y)  

pipe.predict(X)  
pipe.predict_proba(X)  
pipe.score(X, y) 

0.29333333333333333

### Programmatic case 4

In [None]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, clone
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets import make_blobs
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils.metaestimators import if_delegate_has_method


N_SAMPLES = 5000
RANDOM_STATE = 42


class InductiveClusterer(BaseEstimator):
    def __init__(self, clusterer, classifier):
        self.clusterer = clusterer
        self.classifier = classifier

    def fit(self, X, y=None):
        self.clusterer_ = clone(self.clusterer)
        self.classifier_ = clone(self.classifier)
        y = self.clusterer_.fit_predict(X)
        self.classifier_.fit(X, y)
        return self

    @if_delegate_has_method(delegate='classifier_')
    def predict(self, X):
        return self.classifier_.predict(X)

    @if_delegate_has_method(delegate='classifier_')
    def decision_function(self, X):
        return self.classifier_.decision_function(X)


def plot_scatter(X,  color, alpha=0.5):
    return plt.scatter(X[:, 0],
                       X[:, 1],
                       c=color,
                       alpha=alpha,
                       edgecolor='k')


# Generating training data from clustering
X, y = make_blobs(n_samples=N_SAMPLES,
                  cluster_std=[1.0, 1.0, 0.5],
                  centers=[(-5, -5), (0, 0), (5, 5)],
                  random_state=RANDOM_STATE)


# Training a clustering algorithm on the training data and get the cluster labels
clusterer = AgglomerativeClustering(n_clusters=3)
cluster_labels = clusterer.fit_predict(X)

plt.figure(figsize=(12, 4))

plt.subplot(131)
plot_scatter(X, cluster_labels)
plt.title("Ward Linkage")


# Generating new samples and plotting them along with the original dataset
X_new, y_new = make_blobs(n_samples=10,
                          centers=[(-7, -1), (-2, 4), (3, 6)],
                          random_state=RANDOM_STATE)

plt.subplot(132)
plot_scatter(X, cluster_labels)
plot_scatter(X_new, 'black', 1)
plt.title("Unknown instances")


# Declaring the inductive learning model that it will be used to
# predict cluster membership for unknown instances
classifier = RandomForestClassifier(random_state=RANDOM_STATE)
inductive_learner = InductiveClusterer(clusterer, classifier).fit(X)

probable_clusters = inductive_learner.predict(X_new)


plt.subplot(133)
plot_scatter(X, cluster_labels)
plot_scatter(X_new, probable_clusters)

# Plotting decision regions
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

Z = inductive_learner.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.4)
plt.title("Classify unknown instances")

plt.show()

The next piece of code presents an example of sklearn.utils.metaestimators and helps to understand the above piece of code.








In [None]:
from sklearn.utils.metaestimators import if_delegate_has_method

class MetaEst(object):
     def __init__(self, sub_est):
         self.sub_est = sub_est

     @if_delegate_has_method(delegate='sub_est')
     def predict(self, X):
         return self.sub_est.predict(X)

class HasPredict(object):
     def predict(self, X):
         return X.sum(axis=1)

class HasNoPredict(object):
     pass

hasattr(MetaEst(HasPredict()), 'predict')

hasattr(MetaEst(HasNoPredict()), 'predict')