In [23]:
import numpy as np
from sklearn import datasets
from sklearn import preprocessing

def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1.0 - sigmoid(z))

def train(X, y, n_hidden, learning_rate, n_iter):
    m, n_samples, n_input = X.shape
    W1 = np.random.randn(n_input, n_hidden)
    b1 = np.zeros((1, n_hidden))
    W2 = np.random.randn(n_hidden,)  # Reshape W2 to (n_hidden,)
    b2 = np.zeros((1,))
    
    Y_reshaped = y.reshape(-1, 1)  # Reshape y for broadcasting

    for i in range(1, n_iter+1):
        Z2 = np.matmul(X, W1) + b1
        A2 = sigmoid(Z2)
        Z3 = np.matmul(A2, W2) + b2
        A3 = Z3
        
        Y_reshaped = Y_reshaped.reshape(Y_reshaped.shape[0], 1, 1)
        Y_broadcasted = np.broadcast_to(Y_reshaped, (Y_reshaped.shape[0], n_samples, 1))
        dZ3 = A3 - Y_broadcasted

        db2 = np.sum(dZ3, axis=0, keepdims=True)
        
        dZ2 = np.matmul(dZ3, W2) * sigmoid_derivative(Z2)
        dW1 = np.matmul(X.transpose((0, 2, 1)), dZ2)
        db1 = np.sum(dZ2, axis=0)
        
        W2 = W2 - learning_rate * np.mean(dW1, axis=(0, 2), keepdims=True) / m
        b2 = b2 - learning_rate * db2 / m
        W1 = W1 - learning_rate * dW1 / m
        b1 = b1 - learning_rate * db1 / m
        
        if i % 100 == 0:
            cost = np.mean((y.reshape(-1, 1) - A3) ** 2)
            print('Iteration %i, training loss: %f' % (i, cost))
    
    model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model




شبکه ساده

In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])

# Split data into features (X) and target (y)
X = data[:, :-1]
y = data[:, -1]

# Split data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optionally, you can scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
y_train = y_train.reshape(-1, 1)


In [12]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights and biases for the hidden layer
        self.W1 = np.random.randn(self.input_size, self.hidden_size)
        self.b1 = np.zeros((1, self.hidden_size))

        # Initialize weights and biases for the output layer
        self.W2 = np.random.randn(self.hidden_size, self.output_size)
        self.b2 = np.zeros((1, self.output_size))

    def forward(self, X):
        # Compute the activation of the hidden layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)

        # Compute the activation of the output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)

        return self.a2

    def backward(self, X, y, learning_rate):
        m = X.shape[0]

        # Compute the gradients of the output layer
        dZ2 = self.a2 - y
        dW2 = np.dot(self.a1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0) / m

        # Compute the gradients of the hidden layer
        dZ1 = np.dot(dZ2, self.W2.T) * self.sigmoid_derivative(self.z1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0) / m
        

        # Update the weights and biases
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

    def train(self, X, y, num_iterations, learning_rate):
        for i in range(num_iterations):
            # Forward pass
            output = self.forward(X)

            # Backward pass
            self.backward(X, y, learning_rate)

            # Compute the loss (mean squared error)
            loss = np.mean((output - y) ** 2)

            # Print the loss every 100 iterations
            if (i + 1) % 100 == 0:
                print(f"Iteration: {i+1}, Loss: {loss}")

    def predict(self, X):
        return self.forward(X)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))


In [13]:

input_size = X_train_scaled.shape[1]
hidden_size = 20
output_size = 1
nn = NeuralNetwork(input_size, hidden_size, output_size)


num_iterations = 2000
learning_rate = 0.01
nn.train(X_train_scaled, y_train, num_iterations, learning_rate)


predictions = nn.predict(X_test_scaled)


Iteration: 100, Loss: 181.70327896039603
Iteration: 200, Loss: 181.70327896039603
Iteration: 300, Loss: 181.70327896039603
Iteration: 400, Loss: 181.70327896039603
Iteration: 500, Loss: 181.70327896039603
Iteration: 600, Loss: 181.70327896039603
Iteration: 700, Loss: 181.70327896039603
Iteration: 800, Loss: 181.70327896039603
Iteration: 900, Loss: 181.70327896039603
Iteration: 1000, Loss: 181.70327896039603
Iteration: 1100, Loss: 181.70327896039603
Iteration: 1200, Loss: 181.70327896039603
Iteration: 1300, Loss: 181.70327896039603
Iteration: 1400, Loss: 181.70327896039603
Iteration: 1500, Loss: 181.70327896039603
Iteration: 1600, Loss: 181.70327896039603
Iteration: 1700, Loss: 181.70327896039603
Iteration: 1800, Loss: 181.70327896039603
Iteration: 1900, Loss: 181.70327896039603
Iteration: 2000, Loss: 181.70327896039603


In [14]:
def predict(x, model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    A2 = sigmoid(np.matmul(x, W1) + b1)
    A3 = np.matmul(A2, W2) + b2
    return A3


In [15]:
predictions =nn.predict(X_test_scaled)
print(predictions)
print(y_test)


[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
[ 9.04  3.53 18.07  5.52 17.27 11.97 18.33 24.16 12.87 14.33 17.92 17.1
 36.98 12.34 11.74 11.66 17.58 30.62  2.97 18.13  6.59  7.19 22.6   9.67
 15.7  18.14  9.1  18.71 17.27 14.1  14.69  7.2  13.44 10.19 21.32 15.79
  4.03  9.8  14.66  7.54 21.14  7.53  2.88 13.09  8.81 17.11 18.35  6.72
 16.29  4.98 13.27  4.59 18.72  7.67  3.16 10.58 16.59  3.33  9.62 18.46
  9.5   4.81  3.76 11.6

In [16]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import pandas as pd

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])


X = data[:, :-1]
y = data[:, -1]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
y_train = y_train.reshape(-1, 1)


class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size


        self.W1 = np.random.randn(self.input_size, self.hidden_size) * np.sqrt(2 / self.input_size)
        self.b1 = np.zeros((1, self.hidden_size))

        self.W2 = np.random.randn(self.hidden_size, self.output_size) * np.sqrt(2 / self.hidden_size)
        self.b2 = np.zeros((1, self.output_size))

    def forward(self, X):

        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.relu(self.z1)


        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.z2

        return self.a2

    def backward(self, X, y, learning_rate):
        m = X.shape[0]

        dZ2 = self.a2 - y
        dW2 = np.dot(self.a1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0) / m


        dZ1 = np.dot(dZ2, self.W2.T) * self.relu_derivative(self.z1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0) / m


        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

    def train(self, X, y, num_iterations, learning_rate, print_loss=True):
        for i in range(num_iterations):
            # Forward pass
            output = self.forward(X)

            # Backward pass
            self.backward(X, y, learning_rate)

            # Compute the loss (mean squared error)
            loss = np.mean((output - y) ** 2)

            # Print the loss every 100 iterations
            if print_loss and (i + 1) % 100 == 0:
                print(f"Iteration: {i+1}, Loss: {loss}")

    def predict(self, X):
        return self.forward(X)

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)


input_size = X_train_scaled.shape[1]
hidden_size = 20
output_size = 1
nn = NeuralNetwork(input_size, hidden_size, output_size)

num_iterations = 2000
learning_rate = 0.01
nn.train(X_train_scaled, y_train, num_iterations, learning_rate)


predictions = nn.predict(X_test_scaled)


Iteration: 100, Loss: 15.211037728114418
Iteration: 200, Loss: 14.062381624329538
Iteration: 300, Loss: 13.363452068615628
Iteration: 400, Loss: 12.879076781652618
Iteration: 500, Loss: 12.462048419537995
Iteration: 600, Loss: 12.093595215671106
Iteration: 700, Loss: 11.759182541603238
Iteration: 800, Loss: 11.437996288224205
Iteration: 900, Loss: 11.137668595457017
Iteration: 1000, Loss: 10.8217759907913
Iteration: 1100, Loss: 10.484417048679699
Iteration: 1200, Loss: 10.187900224396142
Iteration: 1300, Loss: 9.912695388573876
Iteration: 1400, Loss: 9.647090473410262
Iteration: 1500, Loss: 9.385938640481887
Iteration: 1600, Loss: 9.157585683836515
Iteration: 1700, Loss: 8.954543598879262
Iteration: 1800, Loss: 8.772357708568581
Iteration: 1900, Loss: 8.610979161719218
Iteration: 2000, Loss: 8.45509568952375


In [17]:
def predict(x, model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    A2 = sigmoid(np.matmul(x, W1) + b1)
    A3 = np.matmul(A2, W2) + b2
    return A3

In [18]:
predictions =nn.predict(X_test_scaled)
print(predictions)
print(y_test)

[[ 9.99771142]
 [ 5.63973921]
 [16.84433669]
 [11.71203502]
 [15.50672045]
 [10.06495186]
 [14.30685415]
 [15.67089815]
 [11.52816692]
 [11.71129966]
 [17.41718516]
 [18.91612963]
 [22.23399525]
 [ 9.01298428]
 [13.77876572]
 [11.95962322]
 [19.83989688]
 [26.58978447]
 [ 6.26964545]
 [18.48605776]
 [ 8.99750728]
 [ 7.71945222]
 [18.43276347]
 [ 6.48828881]
 [18.48458117]
 [15.08838506]
 [13.01788542]
 [23.76247401]
 [18.46454602]
 [12.40915076]
 [16.35949781]
 [ 6.34077463]
 [18.88258389]
 [19.76454092]
 [13.06383519]
 [18.13976655]
 [ 3.38875715]
 [10.3015607 ]
 [13.98311244]
 [ 9.91023056]
 [17.46255457]
 [ 8.51931774]
 [ 5.27284141]
 [11.6650258 ]
 [ 7.82410983]
 [19.38586338]
 [18.69237129]
 [ 8.68044642]
 [12.8379146 ]
 [ 8.2014658 ]
 [17.94355898]
 [ 5.53415397]
 [12.94930446]
 [ 6.09845312]
 [ 4.00953753]
 [10.88344506]
 [17.15147754]
 [ 4.70538831]
 [ 7.06700938]
 [15.21554954]
 [ 4.2400568 ]
 [ 2.84867228]
 [ 6.92823375]
 [13.74595659]
 [ 7.19314161]
 [24.09619986]
 [18.29864