In [1]:
class narray:
    def __init__(self, data):
        self.data = data
        self.shape = self._shape(data)
    
    def _shape(self, data):
        shape = []
        while isinstance(data, list):
            shape.append(len(data))
            data = data[0] if len(data) > 0 else None
        return tuple(shape)

    def _format_array(self, data):
        if isinstance(data, list):
            return '[' + ', '.join(self._format_array(item) for item in data) + ']'
        else:
            return str(data)
    
    def __str__(self):
        return '['+',\n '.join(self._format_array(row) for row in self.data)+']'
    
    def __repr__(self):
        return f"Array(data={self.data}, shape={self.shape})"

    def __getitem__(self, index):
        if isinstance(index, tuple):
            if len(index) != len(self.shape):
                raise IndexError(f"Too many indices for array of shape {self.shape}.")
            item = self.data
            try:
                for i in index:
                    item = item[i]
            except IndexError:
                raise IndexError("Index out of range.")
            return item
        elif isinstance(index, int):
            if index >= len(self.data):
                raise IndexError("Index out of range.")
            return self.data[index]
        else:
            raise TypeError("Index must be an integer or a tuple of integers.")

    def dot(self, other):
        if not isinstance(other, narray):
            raise TypeError("The argument must be an instance of BasicArray.")
        
        if len(self.shape) != 2 or len(other.shape) != 2:
            raise ValueError("Both arrays must be 2-dimensional.")

        if self.shape[1] != other.shape[0]:
            raise ValueError("The number of columns in the first array must be equal to the number of rows in the second array.")

        result_data = [[sum(self.data[i][k] * other.data[k][j] for k in range(self.shape[1]))
                        for j in range(other.shape[1])]
                       for i in range(self.shape[0])]

        return narray(result_data)
    
    @property
    def T(self):
        if len(self.shape) != 2 :
            raise ValueError("Array must be 2-dimensional.")
        
        result_data = [[self.data[i][j] for i in range(self.shape[0])] for j in range(self.shape[1])]

        return narray(result_data)

    @staticmethod
    def ones(tp: tuple):
        if not isinstance(tp, (tuple, list)) or len(tp) != 2:
            raise ValueError("Input must be a tuple or list of two elements (rows, cols)")
        
        result_data = [[1 for i in range(tp[1])] for j in range(tp[0])]
        return narray(result_data)

    @staticmethod
    def zeros(tp: tuple):
        if not isinstance(tp, (tuple, list)) or len(tp) != 2:
            raise ValueError("Input must be a tuple or list of two elements (rows, cols)")
        
        result_data = [[0 for i in range(tp[1])] for j in range(tp[0])]
        return narray(result_data)

    @staticmethod
    def c_(other,another):
        if not isinstance(other, narray) or not isinstance(another, narray) :
            raise TypeError("The arguments must be an instance of BasicArray.")
        
        if len(other.shape) != 2 or len(another.shape) != 2:
            raise ValueError("Both arrays must be 2-dimensional.")

        if other.shape[0] != another.shape[0]:
            raise ValueError("Both arrays must have same no. of rows.")

        result_data = [other[i]+another[i] for i in range(other.shape[0])]
        return narray(result_data)

    def _elementwise_operation(self, other, op):
        if not isinstance(other, narray):
            raise TypeError("The argument must be an instance of BasicArray.")

        if self.shape != other.shape:
            raise ValueError("Both arrays must have the same shape.")

        result_data = [[op(self.data[i][j], other.data[i][j]) for j in range(self.shape[1])]
                       for i in range(self.shape[0])]

        return narray(result_data)

    def __add__(self, other):
        if isinstance(other, narray):
            return self._elementwise_operation(other, lambda x, y: x + y)
        elif isinstance(other, (int, float)):
            result_data = [[x + other for x in row] for row in self.data]
            return narray(result_data)
        else:
            raise TypeError("Operand must be an instance of narray or a scalar.")

    def __sub__(self, other):
        if isinstance(other, narray):
            return self._elementwise_operation(other, lambda x, y: x - y)
        elif isinstance(other, (int, float)):
            result_data = [[x - other for x in row] for row in self.data]
            return narray(result_data)
        else:
            raise TypeError("Operand must be an instance of narray or a scalar.")

    def __mul__(self, other):
        if isinstance(other, narray):
            return self._elementwise_operation(other, lambda x, y: x * y)
        elif isinstance(other, (int, float)):
            result_data = [[x * other for x in row] for row in self.data]
            return narray(result_data)
        else:
            raise TypeError("Operand must be an instance of narray or a scalar.")

    def __truediv__(self, other):
        if isinstance(other, narray):
            return self._elementwise_operation(other, lambda x, y: x / y)
        elif isinstance(other, (int, float)):
            result_data = [[x / other for x in row] for row in self.data]
            return narray(result_data)
        else:
            raise TypeError("Operand must be an instance of narray or a scalar.")

    def __pow__(self, power):
        if isinstance(power, (int, float)):
            result_data = [[x ** power for x in row] for row in self.data]
            return narray(result_data)
        else:
            raise TypeError("Power must be an integer or float.")

    def __radd__(self, other):
        if isinstance(other, (int, float)):
            return self.__add__(other)
        else:
            raise TypeError("Operand must be an integer or float.")

    def __rsub__(self, other):
        if isinstance(other, (int, float)):
            return narray([[other - x for x in row] for row in self.data])
        else:
            raise TypeError("Operand must be an integer or float.")

    def __rmul__(self, other):
        if isinstance(other, (int, float)):
            return self.__mul__(other)
        else:
            raise TypeError("Operand must be an integer or float.")

    def __rtruediv__(self, other):
        if isinstance(other, (int, float)):
            return narray([[other / x for x in row] for row in self.data])
        else:
            raise TypeError("Operand must be an integer or float.")

    @staticmethod
    def mean(array):
        total_sum = sum(sum(row) for row in array.data)
        total_elements = array.shape[0] * array.shape[1]
        return total_sum / total_elements

In [7]:
class LinearRegression_manual:
    def __init__(self, learning_rate, epochs):
        self.learning_rate = learning_rate
        self.epochs = epochs
    
    def fit(self, x, y):
        self.m, self.n = x.shape
        self.y = y
        self.x_mean = narray.mean(x)
        self.x_std = (narray.mean((x - self.x_mean) ** 2)) ** 0.5
        self.x = (x - self.x_mean) / self.x_std
        self.x = narray.c_(narray.ones((self.m, 1)), self.x)
        self.theta = narray.zeros((self.n + 1, 1))
        x_transpose = self.x.T  # Cache the transpose
        
        for epoch in range(self.epochs):
            if epoch % 1000 == 0:
                mse = self.mean_squared_error(self.y, self.predict(x))
                print(f"Epoch {epoch}: MSE = {mse}")
            gradients = x_transpose.dot(self.x.dot(self.theta) - self.y) / self.m
            self.theta -= self.learning_rate * gradients
    
    def predict(self, x):
        x = (x - self.x_mean) / self.x_std
        x = narray.c_(narray.ones((x.shape[0], 1)), x)
        return x.dot(self.theta)

    def mean_squared_error(self, y_true, y_pred):
        return narray.mean((y_true - y_pred) ** 2)

In [3]:
class LoadDataset:
    def __init__(self, file_path):
        self.file_path = file_path
        self.x = None
        self.y = None
        self._load_data()

    def _load_data(self):
        self.x = []
        self.y = []
        try:
            with open(self.file_path, 'r', encoding='utf-8-sig') as file:
                next(file)
                for line in file:
                    line = line.strip()
                    if line:
                        parts = line.split(',')
                        if len(parts) > 1:
                            *features, target = parts
                            self.x.append([float(_) for _ in features])
                            self.y.append([float(target)])
            print(f"Data loaded successfully from {self.file_path}.")
        except Exception as e:
            print(f"Error loading data: {e}")

    def get_features(self):
        return self.x

    def get_target(self):
        return self.y

In [4]:
if __name__ == "__main__":
    X = narray([[1], [2], [4], [3], [5]])
    y = narray([[1.5], [3.5], [6.0], [4.5], [9.0]])

    lr = LinearRegression_manual(learning_rate=0.001, epochs=10000)
    lr.fit(X, y)
    
    training_predictions = lr.predict(X)
    training_error = lr.mean_squared_error(y, training_predictions)
    print("Training Mean Squared Error:", training_error)

Epoch 0: MSE = 30.35
Epoch 1000: MSE = 4.289249751853665
Epoch 2000: MSE = 0.765838262501399
Epoch 3000: MSE = 0.2894732919962802
Epoch 4000: MSE = 0.22506878352200282
Epoch 5000: MSE = 0.21636129878101812
Epoch 6000: MSE = 0.21518404749363715
Epoch 7000: MSE = 0.21502488320740937
Epoch 8000: MSE = 0.21500336420778526
Epoch 9000: MSE = 0.2150004548406418
Training Mean Squared Error: 0.21500006149442102


In [5]:
df=LoadDataset('1.01. Simple linear regression.csv')

Data loaded successfully from 1.01. Simple linear regression.csv.


In [6]:
a,b=narray(df.get_features()),narray(df.get_target())
lr = LinearRegression_manual(learning_rate=0.001, epochs=10000)
lr.fit(a, b)

training_predictions = lr.predict(a)
training_error = lr.mean_squared_error(b, training_predictions)
print("Training Mean Squared Error:", training_error)

Epoch 0: MSE = 11.16338333333334
Epoch 1000: MSE = 1.5467351865697287
Epoch 2000: MSE = 0.2465650745532802
Epoch 3000: MSE = 0.07078217240459941
Epoch 4000: MSE = 0.04701633714794201
Epoch 5000: MSE = 0.04380319799423306
Epoch 6000: MSE = 0.04336878182035963
Epoch 7000: MSE = 0.04331004878606045
Epoch 8000: MSE = 0.043302108084204885
Epoch 9000: MSE = 0.04330103450190642
Training Mean Squared Error: 0.04330088935365975
