In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,plot_confusion_matrix,roc_auc_score, classification_report, confusion_matrix, precision_recall_curve, auc
from sklearn.base import BaseEstimator, TransformerMixin


In [32]:
df = pd.read_csv('seeds_dataset.csv')
print('Total de filas: ',df.shape[0],'Total de columnas: ',df.shape[1])
df.sample(2)

X = df.drop(['type_wheat'], axis=1)
y = df['type_wheat']

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3, random_state=30, stratify=y)

Total de filas:  210 Total de columnas:  8


In [35]:
class SC(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self,X,y=None):
        self.means = X.mean()
        return self
    
    def transform(self, X, y=None):
        return X

In [34]:
X_train.mean()

area             14.817891
perimeter        14.555442
compactness       0.870073
length            5.631966
width             3.249626
coefficient       3.765008
length_groove     5.409020
dtype: float64

In [39]:
sc = SC()
sc.fit_transform(X_train)

Unnamed: 0,area,perimeter,compactness,length,width,coefficient,length_groove
127,17.98,15.85,0.8993,5.979,3.687,2.257,5.919
44,15.11,14.54,0.8986,5.579,3.462,3.128,5.180
197,13.37,13.78,0.8849,5.320,3.128,4.670,5.091
179,12.21,13.47,0.8453,5.357,2.893,1.661,5.178
83,19.57,16.74,0.8779,6.384,3.772,1.472,6.273
...,...,...,...,...,...,...,...
177,10.74,12.73,0.8329,5.145,2.642,4.702,4.963
32,14.09,14.41,0.8529,5.717,3.186,3.920,5.299
33,13.94,14.17,0.8728,5.585,3.150,2.124,5.012
5,14.38,14.21,0.8951,5.386,3.312,2.462,4.956


In [38]:
sc.means

area             14.817891
perimeter        14.555442
compactness       0.870073
length            5.631966
width             3.249626
coefficient       3.765008
length_groove     5.409020
dtype: float64

In [42]:
class SC(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self,X,y=None):
        self.means = X.mean()
        return self
    
    def transform(self, X, y=None):
        return X-self.means

In [43]:
sc = SC()
sc.fit_transform(X_train)


Unnamed: 0,area,perimeter,compactness,length,width,coefficient,length_groove
127,3.162109,1.294558,0.029227,0.347034,0.437374,-1.508008,0.50998
44,0.292109,-0.015442,0.028527,-0.052966,0.212374,-0.637008,-0.22902
197,-1.447891,-0.775442,0.014827,-0.311966,-0.121626,0.904992,-0.31802
179,-2.607891,-1.085442,-0.024773,-0.274966,-0.356626,-2.104008,-0.23102
83,4.752109,2.184558,0.007827,0.752034,0.522374,-2.293008,0.86398
...,...,...,...,...,...,...,...
177,-4.077891,-1.825442,-0.037173,-0.486966,-0.607626,0.936992,-0.44602
32,-0.727891,-0.145442,-0.017173,0.085034,-0.063626,0.154992,-0.11002
33,-0.877891,-0.385442,0.002727,-0.046966,-0.099626,-1.641008,-0.39702
5,-0.437891,-0.345442,0.025027,-0.245966,0.062374,-1.303008,-0.45302


In [13]:
sc.means

area             14.817891
perimeter        14.555442
compactness       0.870073
length            5.631966
width             3.249626
coefficient       3.765008
length_groove     5.409020
dtype: float64

In [15]:
X_train.mean()
X_test.mean()

area             14.916667
perimeter        14.568254
compactness       0.873157
length            5.620524
width             3.279556
coefficient       3.548984
length_groove     5.405857
dtype: float64

In [18]:
sc.fit_transform(X_train)

area             14.817891
perimeter        14.555442
compactness       0.870073
length            5.631966
width             3.249626
coefficient       3.765008
length_groove     5.409020
dtype: float64


Unnamed: 0,area,perimeter,compactness,length,width,coefficient,length_groove
127,3.162109,1.294558,0.029227,0.347034,0.437374,-1.508008,0.50998
44,0.292109,-0.015442,0.028527,-0.052966,0.212374,-0.637008,-0.22902
197,-1.447891,-0.775442,0.014827,-0.311966,-0.121626,0.904992,-0.31802
179,-2.607891,-1.085442,-0.024773,-0.274966,-0.356626,-2.104008,-0.23102
83,4.752109,2.184558,0.007827,0.752034,0.522374,-2.293008,0.86398
...,...,...,...,...,...,...,...
177,-4.077891,-1.825442,-0.037173,-0.486966,-0.607626,0.936992,-0.44602
32,-0.727891,-0.145442,-0.017173,0.085034,-0.063626,0.154992,-0.11002
33,-0.877891,-0.385442,0.002727,-0.046966,-0.099626,-1.641008,-0.39702
5,-0.437891,-0.345442,0.025027,-0.245966,0.062374,-1.303008,-0.45302


In [19]:
X_train

Unnamed: 0,area,perimeter,compactness,length,width,coefficient,length_groove
127,17.98,15.85,0.8993,5.979,3.687,2.257,5.919
44,15.11,14.54,0.8986,5.579,3.462,3.128,5.180
197,13.37,13.78,0.8849,5.320,3.128,4.670,5.091
179,12.21,13.47,0.8453,5.357,2.893,1.661,5.178
83,19.57,16.74,0.8779,6.384,3.772,1.472,6.273
...,...,...,...,...,...,...,...
177,10.74,12.73,0.8329,5.145,2.642,4.702,4.963
32,14.09,14.41,0.8529,5.717,3.186,3.920,5.299
33,13.94,14.17,0.8728,5.585,3.150,2.124,5.012
5,14.38,14.21,0.8951,5.386,3.312,2.462,4.956


In [44]:
class SC(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self,X,y=None):
        self.means = X.mean()
        self.stds = X.std(ddof=0)
        return self
    
    def transform(self, X, y=None):
        return (X-self.means)/self.stds

In [47]:
sc = SC()
sc.fit_transform(X_train)
sc.transform(X_test)
sc.means

area             14.817891
perimeter        14.555442
compactness       0.870073
length            5.631966
width             3.249626
coefficient       3.765008
length_groove     5.409020
dtype: float64

In [48]:
X_test.mean()

area             14.916667
perimeter        14.568254
compactness       0.873157
length            5.620524
width             3.279556
coefficient       3.548984
length_groove     5.405857
dtype: float64

In [46]:
scaler = StandardScaler()
scaler.fit_transform(X_train)
scaler.transform(X_test)

array([[-1.22227925, -1.31183254, -0.45916671, -1.33143532, -1.08234023,
        -0.27634817, -0.84565053],
       [ 1.26944906,  1.10099474,  1.64958235,  0.80036615,  1.40855668,
        -0.51633006,  1.03301557],
       [-0.34367265, -0.47901796,  1.11472292, -0.70822259,  0.34952025,
        -0.99563272, -1.25804065],
       [-1.4155727 , -1.26513265, -2.60298852, -0.72432034, -1.62536656,
         1.12189338, -0.44783986],
       [ 1.43111268,  1.3033609 ,  1.34707989,  1.36838661,  1.40315343,
        -0.08330489,  1.16006505],
       [-0.91300972, -0.96158342, -0.21365746, -0.93819039, -0.76354865,
         1.12652113, -0.39160484],
       [ 1.1148143 ,  1.01537829,  1.27693439,  0.59339514,  1.20053167,
        -1.12190418,  0.89138664],
       [ 0.96017954,  0.85971202,  1.27255029,  0.36572702,  1.18972517,
         1.0584271 ,  0.52481765],
       [-1.41205827, -1.54533195, -0.48547127, -1.49701214, -1.15798569,
         0.66639058, -0.72068383],
       [ 0.45761657,  0.3460