In [6]:
import numpy as np
import pandas as pd

# Standard Scaler

In [2]:
class StandardScaler:
    def __init__(self):
        self.mean=None
        self.std=None
    
    def fit(self,X_train):
        self.mean=X_train.mean(axis=0)
        self.std=X_train.std(axis=0)
        self.std[self.std==0]=1
        return self
    
    def fit_transform(self,X_train):
        self.fit(X_train)
        return (X_train-self.mean)/self.std
    
    def transform(self,X_test):
        return (X_test-self.mean)/self.std





# MinMax Scaler

In [3]:
class MinMaxScaler:
    def __init__(self,min=0,max=1):
        self.min=min
        self.max=max
        self.range=max-min
        self.min_val=None
        self.max_val=None
        if self.range<0:
            raise Exception("Min value is greater than max value")
        
    def fit(self,x):
        self.max_val=np.max(x,axis=0)
        self.min_val=np.min(x,axis=0)
        self.scale_=np.where(self.max_val-self.min_val==0,1,self.max_val-self.min_val)
        return self
    
    def fit_transform(self,x):
        self.fit(x)
        return self.min+(x-self.min_val)*(self.range)/self.scale_
    
    def transform(self,x):
        return self.min+(x-self.min_val)*(self.range)/self.scale_




# MaxabsScaler

In [11]:
class MaxAbsScaler:
    def __init__(self):
        self.Absmax=None
    
    def fit(self,x):
        self.Absmax=np.max(abs(x),axis=0)
        self.scale_=np.where(self.Absmax==0,1,self.Absmax)
        return self
    
    def fit_transform(self,x):
        self.fit(x)
        return x/self.scale_
    
    def transform(self,x):
        return x/self.scale_
    


# MeanScaler

In [None]:
class MeanScaler:
    def __init__(self):
        self.mean=None
    
    def fit(self,X_train):
        self.mean=X_train.mean(axis=0)   
        return self
    
    def fit_transform(self,X_train):
        self.fit(X_train)
        return X_train-self.mean
    
    def transform(self,X_test):
        return X_test-self.mean




# simple imputer

In [7]:
from collections import Counter
class SimpleImputer:
    def __init__(self,fill_value='mean'):
        self.fill_value=fill_value

    def fill(self,X):
        X=np.array(X,dtype=float)
        if self.fill_value=='mean':
            mean=np.nanmean(X)
            X[np.isnan(X)]=mean

        elif self.fill_value=='mode':
            non_nan=X[~np.isnan(X)]
            count=Counter(non_nan)
            mode=count.most_common(1)[0][0]
            X[np.isnan(X)]=mode
            

        elif self.fill_value=='median':
            median=np.nanmedian(X)
            X[np.isnan(X)]=median     

        elif isinstance(self.fill_value,(float,int)):
            X[np.isnan(X)]=self.fill_value

        else:
            raise ValueError(f"Invalid Fill value. Choose from 'mean', 'median', 'mode', or a number.")
        
        return X
        




        

# testing Simpleimputer

In [8]:

# ---------------- Large Test Cases ----------------
test_cases = {
    "large_random_mean": (np.random.choice([np.nan, 1, 2, 3, 4, 5], size=50), 'mean'),
    "large_random_median": (np.random.choice([np.nan, 10, 20, 30, 40, 50], size=100), 'median'),
    "large_random_mode": (np.random.choice([np.nan, 1, 1, 2, 3, 4], size=60), 'mode'),
    "large_random_constant": (np.random.choice([np.nan, 5, 10, 15], size=40), 999),
    "all_NaN_large": (np.full(30, np.nan), 'mean'),
    "no_NaN_large": (np.arange(1, 51), 'mode'),
    "mixed_types_large": (np.array([1, 2.5, np.nan, 3, 4.5, np.nan, 2, 1, np.nan]), 'median')
}

# ---------------- Run Tests ----------------
for name, (data, strategy) in test_cases.items():
    print(f"Test Case: {name}")
    imputer = SimpleImputer(strategy)
    try:
        filled = imputer.fill(data)
        print(f"Original (first 20 elements): {np.array(data)[:20]}")
        print(f"Filled   (first 20 elements): {filled[:20]}\n")
    except Exception as e:
        print(f"Raised Exception: {e}\n")


Test Case: large_random_mean
Original (first 20 elements): [ 5.  2.  4.  5.  5. nan  2.  4. nan  3.  2.  2.  5. nan nan nan  5.  3.
  5.  2.]
Filled   (first 20 elements): [5.         2.         4.         5.         5.         3.10526316
 2.         4.         3.10526316 3.         2.         2.
 5.         3.10526316 3.10526316 3.10526316 5.         3.
 5.         2.        ]

Test Case: large_random_median
Original (first 20 elements): [nan 50. 10. nan 50. nan 30. 30. 50. 50. nan 50. nan 40. nan 40. 40. 30.
 50. 10.]
Filled   (first 20 elements): [30. 50. 10. 30. 50. 30. 30. 30. 50. 50. 30. 50. 30. 40. 30. 40. 40. 30.
 50. 10.]

Test Case: large_random_mode
Original (first 20 elements): [ 1.  1.  4.  1. nan  3.  1.  4.  3.  2. nan  1.  1.  4.  1. nan  2.  1.
  3.  2.]
Filled   (first 20 elements): [1. 1. 4. 1. 1. 3. 1. 4. 3. 2. 1. 1. 1. 4. 1. 1. 2. 1. 3. 2.]

Test Case: large_random_constant
Original (first 20 elements): [15. nan  5. 10. nan 15. 10. nan nan 15. nan nan 15. 10. nan 1

  mean=np.nanmean(X)


# Testing StandardScaler


In [4]:
from sklearn.preprocessing import StandardScaler as SklearnScaler



# Sample data
X_train = np.array([[1, 2, 3],
                    [4, 5, 6],
                    [7, 8, 9]], dtype=float)

X_test = np.array([[2, 3, 4],
                   [5, 6, 7]], dtype=float)

# Using custom scaler
custom_scaler = StandardScaler()
X_train_custom = custom_scaler.fit_transform(X_train)
X_test_custom = custom_scaler.transform(X_test)

# Using sklearn scaler
sklearn_scaler = SklearnScaler()
X_train_sklearn = sklearn_scaler.fit_transform(X_train)
X_test_sklearn = sklearn_scaler.transform(X_test)

# Compare results
print("Custom Scaler - Train:\n", X_train_custom)
print("Sklearn Scaler - Train:\n", X_train_sklearn)
print("\nCustom Scaler - Test:\n", X_test_custom)
print("Sklearn Scaler - Test:\n", X_test_sklearn)


Custom Scaler - Train:
 [[-1.22474487 -1.22474487 -1.22474487]
 [ 0.          0.          0.        ]
 [ 1.22474487  1.22474487  1.22474487]]
Sklearn Scaler - Train:
 [[-1.22474487 -1.22474487 -1.22474487]
 [ 0.          0.          0.        ]
 [ 1.22474487  1.22474487  1.22474487]]

Custom Scaler - Test:
 [[-0.81649658 -0.81649658 -0.81649658]
 [ 0.40824829  0.40824829  0.40824829]]
Sklearn Scaler - Test:
 [[-0.81649658 -0.81649658 -0.81649658]
 [ 0.40824829  0.40824829  0.40824829]]


In [5]:
custom_scaler.mean

array([4., 5., 6.])

In [6]:
custom_scaler.std

array([2.44948974, 2.44948974, 2.44948974])

In [7]:
sklearn_scaler.mean_

array([4., 5., 6.])

In [8]:
sklearn_scaler.scale_

array([2.44948974, 2.44948974, 2.44948974])

In [9]:
sklearn_scaler.var_

array([6., 6., 6.])

# Testing MinMaxScaler

In [10]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler as SklearnMinMaxScaler



# Sample data
X_train = np.array([[1, 2],
                    [3, 4],
                    [5, 6]], dtype=float)

X_test = np.array([[2, 3],
                   [4, 5]], dtype=float)

# Custom scaler
custom_scaler = MinMaxScaler(min=0, max=1)
X_train_custom = custom_scaler.fit_transform(X_train)
X_test_custom = custom_scaler.transform(X_test)

# Sklearn scaler
sklearn_scaler = SklearnMinMaxScaler(feature_range=(0, 1))
X_train_sklearn = sklearn_scaler.fit_transform(X_train)
X_test_sklearn = sklearn_scaler.transform(X_test)

# Compare results
print("Custom Scaler - Train:\n", X_train_custom)
print("Sklearn Scaler - Train:\n", X_train_sklearn)
print("\nCustom Scaler - Test:\n", X_test_custom)
print("Sklearn Scaler - Test:\n", X_test_sklearn)


print("\nTrain arrays equal:", np.allclose(X_train_custom, X_train_sklearn))
print("Test arrays equal:", np.allclose(X_test_custom, X_test_sklearn))


Custom Scaler - Train:
 [[0.  0. ]
 [0.5 0.5]
 [1.  1. ]]
Sklearn Scaler - Train:
 [[0.  0. ]
 [0.5 0.5]
 [1.  1. ]]

Custom Scaler - Test:
 [[0.25 0.25]
 [0.75 0.75]]
Sklearn Scaler - Test:
 [[0.25 0.25]
 [0.75 0.75]]

Train arrays equal: True
Test arrays equal: True


# Testing MaxAbsScaler

In [12]:
import numpy as np
from sklearn.preprocessing import MaxAbsScaler as SklearnMaxAbsScaler


# Sample data
X_train = np.array([[1, -2, 3],
                    [-4, 5, -6],
                    [7, -8, 9]], dtype=float)

X_test = np.array([[2, -3, 4],
                   [-5, 6, -7]], dtype=float)

# Custom scaler
custom_scaler = MaxAbsScaler()
X_train_custom = custom_scaler.fit_transform(X_train)
X_test_custom = custom_scaler.transform(X_test)

# Sklearn scaler
sklearn_scaler = SklearnMaxAbsScaler()
X_train_sklearn = sklearn_scaler.fit_transform(X_train)
X_test_sklearn = sklearn_scaler.transform(X_test)

# Compare results
print("Custom Scaler - Train:\n", X_train_custom)
print("Sklearn Scaler - Train:\n", X_train_sklearn)
print("\nCustom Scaler - Test:\n", X_test_custom)
print("Sklearn Scaler - Test:\n", X_test_sklearn)

# Check if arrays are almost equal
print("\nTrain arrays equal:", np.allclose(X_train_custom, X_train_sklearn))
print("Test arrays equal:", np.allclose(X_test_custom, X_test_sklearn))


Custom Scaler - Train:
 [[ 0.14285714 -0.25        0.33333333]
 [-0.57142857  0.625      -0.66666667]
 [ 1.         -1.          1.        ]]
Sklearn Scaler - Train:
 [[ 0.14285714 -0.25        0.33333333]
 [-0.57142857  0.625      -0.66666667]
 [ 1.         -1.          1.        ]]

Custom Scaler - Test:
 [[ 0.28571429 -0.375       0.44444444]
 [-0.71428571  0.75       -0.77777778]]
Sklearn Scaler - Test:
 [[ 0.28571429 -0.375       0.44444444]
 [-0.71428571  0.75       -0.77777778]]

Train arrays equal: True
Test arrays equal: True
