In [2]:
import numpy as np

In [3]:
import torch
import time

# Set the matrix size
matrix_size = 1000

# Generate two random matrices
A = torch.randn(matrix_size, matrix_size)
B = torch.randn(matrix_size, matrix_size)

# Function to perform matrix multiplication on CPU
def cpu_matrix_multiplication(A, B):
    # Ensure tensors are on the CPU
    A = A.to('cpu')
    B = B.to('cpu')
    
    start_time = time.time()
    result = torch.mm(A, B)
    end_time = time.time()
    
    return end_time - start_time, result

# Function to perform matrix multiplication on GPU
def gpu_matrix_multiplication(A, B):
    # Ensure tensors are on the GPU
    A = A.to('mps')  # Use 'mps' for Metal Performance Shaders on M1/M2
    B = B.to('mps')
    
    # Warm-up to ensure accurate timing
    torch.mm(A, B)
    
    # Perform matrix multiplication
    start_time = time.time()
    result = torch.mm(A, B)
    end_time = time.time()
    
    return end_time - start_time, result

# Perform the matrix multiplication on CPU
cpu_time, res_cpu = cpu_matrix_multiplication(A, B)
print(f"CPU Time: {cpu_time:.6f} seconds")

# Perform the matrix multiplication on GPU
gpu_time, res_gpu = gpu_matrix_multiplication(A, B)
print(f"GPU Time (using MPS): {gpu_time:.6f} seconds")

# Compare the results
speedup = cpu_time / gpu_time
print(f"Speedup using GPU (MPS): {speedup:.2f}x")



CPU Time: 0.017284 seconds
GPU Time (using MPS): 0.001121 seconds
Speedup using GPU (MPS): 15.42x


# forward

In [4]:
X1=np.array([[1,2,3],[4,5,6]])
W1=np.array([[6,7],[3,3],[2,2]])
Y1=X1@W1
f=lambda x: x+1
df=lambda x: np.ones(x.shape)
y_out=f(Y1)
L=np.sum(y_out,axis=1)

In [5]:
X1

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
W1

array([[6, 7],
       [3, 3],
       [2, 2]])

In [7]:
Y1

array([[18, 19],
       [51, 55]])

In [8]:
y_out

array([[19, 20],
       [52, 56]])

In [9]:
L

array([ 39, 108])

# backward

In [10]:
dL_dyout=np.ones((2,2))
dyout_Y1=df(Y1)
Y1_W1=X1.T


In [11]:
dL_dyout

array([[1., 1.],
       [1., 1.]])

In [12]:
dyout_Y1

array([[1., 1.],
       [1., 1.]])

In [24]:
dyout_Y1

array([[1., 1.],
       [1., 1.]])

In [13]:
dyout_Y1.shape[1]

2

In [14]:
func=torch.optim.Adam.__init__
adam_params = func.__code__.co_varnames[:func.__code__.co_argcount]
adam_params

('self', 'params', 'lr', 'betas', 'eps', 'weight_decay', 'amsgrad')

In [15]:
hparams={"lr": 1, "a":2}
parsed_arguments={ key: value for key,value in hparams.items() if key in adam_params}
parsed_arguments


{'lr': 1}

In [16]:
from itertools import product

grid_search_spaces={"a":[1,3], "b":[20,25]}
configs = []


    # More general implementation using itertools
for instance in product(*grid_search_spaces.values()):
    configs.append(dict(zip(grid_search_spaces.keys(), instance)))

configs

[{'a': 1, 'b': 20}, {'a': 1, 'b': 25}, {'a': 3, 'b': 20}, {'a': 3, 'b': 25}]

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ShortcutModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ShortcutModel, self).__init__()
        # Linear layer
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        # Batch normalization layer
        self.bn = nn.BatchNorm1d(hidden_size)
        
        # Output layer
        self.fc2 = nn.Linear(hidden_size, output_size)
        
        ## combinde
        self.ffs=nn.Sequential(self.fc1,self.bn,nn.ReLU())

        # Shortcut connection (identity mapping)
        if input_size != hidden_size:
            # If input and output dimensions are different, use a linear layer to match dimensions
            self.shortcut = nn.Linear(input_size, hidden_size)
        else:
            # Identity mapping if dimensions match
            self.shortcut = nn.Identity()

    def forward(self, x):
        # First linear transformation
        #out = self.fc1(x)
        
        # Batch normalization and activation
        #out = self.bn(out)
        #out = F.relu(out)
        
        # Shortcut connection (adding the input to the transformed output)
        shortcut = self.shortcut(x)
        out =self.ffs(x)
        out += shortcut
        
        # Pass through output layer
        out = self.fc2(out)
        
        return out

# Example usage
input_size = 10
hidden_size = 20
output_size = 1
model = ShortcutModel(input_size, hidden_size, output_size)

# Example input tensor
x = torch.randn(5, input_size)
output = model(x)

print(output)
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

tensor([[-0.5324],
        [-0.6000],
        [-0.0990],
        [-0.3604],
        [ 0.2153]], grad_fn=<AddmmBackward0>)
501


In [None]:
model

In [19]:
hparams["optimizer"]="Adam"
getattr(torch.optim, hparams["optimizer"].capitalize())

torch.optim.adam.Adam

In [23]:
getattr(torch.nn, "CrossEntropyLoss")

torch.nn.modules.loss.CrossEntropyLoss

In [2]:
import numpy as np

In [3]:
x=np.ones((1,31))

In [4]:
np.squeeze(x)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [5]:
x=np.arange(10).reshape((2,5))

In [15]:
sample_mean = np.mean(x, axis=0)

x_minus_mean = x - sample_mean[np.newaxis,:]

In [16]:
x_minus_mean

array([[-2.5, -2.5, -2.5, -2.5, -2.5],
       [ 2.5,  2.5,  2.5,  2.5,  2.5]])