In [1]:
import pandas as pd
import numpy as np

In [5]:
df = pd.read_csv("C:\\Users\\GM\\Desktop\\Data Science\\Machine Learning Projects\\Datasets\\housing\\housing.csv")

In [6]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


Ordinal Encoder / Label Encoder

In [14]:
class OrdinalEncoder:
    def __init__(self):
        self.mapping = {}
        self.inverse_mapping = {}
        
    def fit(self,data):
        unique_values = sorted(set(data))
        self.mapping = {value: index for index, value in enumerate(unique_values)}
        self.inverse_mapping = {index: value for index, value in enumerate(unique_values)}
        
    def transform(self, data):
        return [self.mapping[value] for value in data]
    
    def inverse_transform(self, data):
        return [self.inverse_mapping[value] for value in data]
        

In [16]:
data = ['Lime', 'Cyan', 'Orange', 'Blue', 'Green', 'Lightblue']
encoder = OrdinalEncoder()
encoder.fit(data)
encoded_data = encoder.transform(data)
print(encoded_data)
decoded_data = encoder.inverse_transform(encoded_data)
print(decoded_data)

[4, 1, 5, 0, 2, 3]
['Lime', 'Cyan', 'Orange', 'Blue', 'Green', 'Lightblue']


One Hot Encoder

In [17]:
class Onehotencoder():
    def __init__(self):
        self.categories = []
        self.mapping = []
        
    def fit(self, data):
        self.categories = sorted(set(data))
        self.mapping = {category: index for index, category in enumerate(self.categories)}
        
    def transform(self, data):
        encoded_data = []
        for value in data:
            encoding = [0] * len(self.categories)
            if value in self.mapping:
                index = self.mapping[value]
                encoding[index] = 1
            encoded_data.append(encoding)
        return encoded_data
    
    def inverse_transform(self, encoded_data):
        data = []
        for encoding in encoded_data:
            index = encoding.index(1)
            category = self.categories[index]
            data.append(category)
        return data    

In [25]:
data = ['Lime', 'Cyan', 'Orange', 'Blue', 'Green', 'Lightblue']
encoder = Onehotencoder()
encoder.fit(data)
encoded_data = encoder.transform(data)
print(encoded_data)
decoded_data = encoder.inverse_transform(encoded_data)
print(decoded_data)


[[0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
['Lime', 'Cyan', 'Orange', 'Blue', 'Green', 'Lightblue']


Min Max Scaling

In [34]:
def Minmaxscaling(data):
    data = np.array(data)
    min_value = np.min(data)
    max_value = np.max(data)
    
    data_range = max_value - min_value
    
    scaled_data = (data - min_value)/ data_range
    
    return scaled_data

In [35]:
data = [20, -80, 30, 40, 90]
scaled_data = Minmaxscaling(data)
print(scaled_data)

[0.58823529 0.         0.64705882 0.70588235 1.        ]


Standard Scaling

In [36]:
def Standardscaling(data):
    data = np.array(data)
    mean = np.mean(data)
    std = np.std(data)
    
    scaled_data = (data - mean)/std
    
    return scaled_data

In [37]:
data = [20, -80, 30, 40, 90]
scaled_data = Standardscaling(data)
print(scaled_data)

[ 0.         -1.80187493  0.18018749  0.36037499  1.26131245]
