In [1]:
import numpy as np
import torch.nn as nn
import torch
import matplotlib.pyplot as plt
import torch.optim as optim

paris_coords = np.array([
    [48.8575, 2.3514], #Center of Paris
    [48.8584, 2.2945], # Eiffel Tower
    [48.8530, 2.3499], #Notre Dame
    [48.8606,  2.3376], #Louvre
    [48.8606, 2.3522]  #Centre Pompidou
])

madrid_coords = np.array([
    [40.4167, -3.7033],   # Center of Madrid
    [40.4153, -3.6835],   # Retiro Park 
    [40.4180, -3.7143],   # Royal Palace 
    [40.4138, -3.6921],   # Prado Museum 
    [40.4169, -3.7033]   # Puerta del Sol 
])

berlin_coords = np.array([
    [52.5200, 13.4050], # Center of Berlin
    [52.5163, 13.3777],   # Brandenburg Gate 
    [52.5169, 13.4019],   # Museum Island 
    [52.5074, 13.3904],   # Checkpoint Charlie 
    [52.5251, 13.3694]   # Berlin Central Station 
])

barcelona_coords = np.array([
    [41.3874, 2.1686],    # Center of Barcelona
    [41.4036, 2.1744],    # Sagrada Familia
    [41.3819, 2.1773],    # Gothic Quarter 
    [41.4145, 2.1527],    # Park Güell 
    [41.3809, 2.1228],    # Camp Nou
])


In [2]:
mean_to_subtract=np.array([48.8575, 2.3514]) #Center of Paris

In [3]:
# Set random seed for reproducibility
random_seed=25
torch.manual_seed(random_seed)
np.random.seed(random_seed)

In [4]:
# Combine data into one matrix X and labels y 
X_raw = np.vstack([madrid_coords, paris_coords, berlin_coords, barcelona_coords]) #, brussels_coords, vienna_coords])
y = np.array([0, 0, 0, 0, 0,  #  Madrid labels (0)
              1, 1, 1, 1, 1,  #  Paris labels (1)
              2, 2, 2, 2, 2, # Berlin labels (2)
              3, 3, 3, 3, 3]) # Barcelona Labels (3)

# Normalize data (simple scaling by dividing by 100, I guess I could do 10)
X = X_raw # / 100 - Simple longitude only problem does not seem to require normalization, that's interesting!
X = X_raw - mean_to_subtract #np.array([28, 2.2]) #Ok looks like I might need to normalize liek this for this one to converge

rI=np.arange(len(y))
np.random.shuffle(rI)
rI[0]=5
rI[11]=13 # A little manual shuffling to match example I showed earlier in the chapter. 
X=X[rI,:]
y=y[rI]
X_raw=X_raw[rI,:]

print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

Shape of X: (20, 2)
Shape of y: (20,)


In [5]:
rI

array([ 5, 11,  2, 10,  6, 18, 16,  0,  3,  7,  1, 13, 14,  9, 19, 17,  8,
       12, 15,  4])

In [6]:
class TinyGPSModel(nn.Module):
    def __init__(self, input_size=2, output_size=4):
        super(TinyGPSModel, self).__init__()
        self.output = nn.Linear(input_size, output_size) #, bias=False)  # 3 cities

    def forward(self, x):
        x = self.output(x)
        return x

In [7]:
model = TinyGPSModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [8]:
#Where to save training heatmaps
import os
# save_dir='/Users/stephen/Stephencwelch Dropbox/Stephen Welch/welch_labs/backprop2/graphics/to_manim/jun_6_2'
save_dir='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/training_heatmaps_2'
os.makedirs(save_dir, exist_ok=True)

In [9]:
BLUE='#21409a'
RED='#ed1c24'
GREEN='#00a14b'
CHILL_BROWN='#948979'
PURPLE='#7f3f98'

In [10]:
#Swaggin - need to sync with Sam on exact numbers
# min_long=-6.0
# max_long=17.0
# min_lat=36.0
# max_lat=56.0
min_long=-6.0
max_long=16.5
min_lat=38.6
max_lat=53.5  
num_steps=256
heatmap_viz_logit_multiplier=8 #Makes things more winner take all for cleaner logit viz
heatmaps=[np.zeros((num_steps, num_steps)) for i in range(8)]

for i, lat in enumerate(np.linspace(max_lat, min_lat, num_steps)):
    for j, long in enumerate(np.linspace(min_long, max_long, num_steps)):
        with torch.no_grad():
            logits=model(torch.tensor([lat, long], dtype=torch.float)).detach()
            yhat=torch.nn.Softmax(0)(heatmap_viz_logit_multiplier*logits).numpy()

        for k in range(4):
            heatmaps[k][i,j]=logits.numpy()[k]
            heatmaps[k+3][i,j]=yhat[k]

In [11]:
import matplotlib.colors as mcolors
def create_transparent_colormap(color='cyan', name='transparent_to_color'):
    # Convert color name to RGBA
    base_color = mcolors.to_rgba(color)
    
    # Create colormap: transparent (alpha=0) to full color (alpha=1)
    colors = [(base_color[0], base_color[1], base_color[2], 0),  # transparent
              (base_color[0], base_color[1], base_color[2], 1)]  # full color
    
    n_bins = 256
    cmap = mcolors.LinearSegmentedColormap.from_list(name, colors, N=n_bins)
    return cmap

# Create the colormap
transparent_cyan_cmap = create_transparent_colormap(BLUE)
transparent_yellow_cmap = create_transparent_colormap(RED)
transparent_green_cmap = create_transparent_colormap(GREEN)
transparent_magenta_cmap = create_transparent_colormap(PURPLE)

cmaps=[transparent_cyan_cmap, transparent_yellow_cmap, transparent_green_cmap, transparent_magenta_cmap, 
       transparent_cyan_cmap, transparent_yellow_cmap, transparent_green_cmap, transparent_magenta_cmap]
save_names=['_logits_1.png', '_logits_2.png', '_logits_3.png', '_logits_4.png', 
            '_yhat_1.png', '_yhat_2.png', '_yhat_3.png', '_yhat_4.png']


In [12]:
# Initialize model, loss, and optimizer
model = TinyGPSModel()
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.03)

#I can manually initialize and still learns real good? yeah seems like it!
# with torch.no_grad():
#     model.output.weight[0,0]=1.0
#     model.output.weight[1,0]=0.0
#     model.output.weight[2,0]=-1.0
#     model.output.bias[0]=0
#     model.output.bias[1]=0
#     model.output.bias[2]=0

weights=[]
grads=[]
xs=[]
ys=[]
logitss=[]
yhats=[]

# Training loop
for i in range(500):
    xs.append(X_raw[i%len(y)])
    ys.append(y[i%len(y)])
    weights.append(np.concatenate([model.output.weight.detach().numpy().ravel(), model.output.bias.detach().numpy().ravel()]))
    
    optimizer.zero_grad()
    
    #Stochastic - i think this is a better starting point pedagogically. 
    outputs = model(torch.tensor(X[i%len(y)]).float())
    loss = criterion(outputs, torch.tensor(y[i%len(y)])) 

    logitss.append(outputs.detach().numpy())
    yhats.append(torch.nn.Softmax(0)(outputs.detach()).numpy())

    #Heatmaps
    heatmaps=[np.zeros((num_steps, num_steps)) for i in range(8)]
    for j, lat in enumerate(np.linspace(max_lat, min_lat, num_steps)):
        for k, long in enumerate(np.linspace(min_long, max_long, num_steps)):
            with torch.no_grad():
                coords_norm=np.array([lat, long])-mean_to_subtract
                logits=model(torch.tensor(coords_norm.ravel(), dtype=torch.float)).detach()
                yhat=torch.nn.Softmax(0)(heatmap_viz_logit_multiplier*logits).numpy()
    
            for l in range(4):
                heatmaps[l][j, k]=logits.numpy()[l]
                heatmaps[l+4][j,k]=yhat[l]

    for l in range(8):
        plt.clf()
        plt.figure(frameon=False)
        ax = plt.Axes(plt.gcf(), [0., 0., 1., 1.])
        ax.set_axis_off()
        plt.gcf().add_axes(ax)
        plt.imshow(heatmaps[l],  cmap=cmaps[l]) #np.rot90(heatmaps[0])) #Wait and see if I need to rotate or transpose
        plt.savefig(save_dir+'/'+str(i)+save_names[l], bbox_inches='tight', pad_inches=0, dpi=300)
        plt.close()
    
    loss.backward()  # backpropagation
    grads.append(np.concatenate([model.output.weight.grad.detach().numpy().ravel(), model.output.bias.grad.detach().numpy().ravel()]))
    optimizer.step() #
    
    if (i + 1) % 10 == 0:
        with torch.no_grad():
            logits=model(torch.tensor(X, dtype=torch.float)) 
            accuracy=(torch.argmax(logits, dim=1)==torch.tensor(y)).sum().item()/len(y)
        print(f"Step {i+1}/{i}, Loss: {loss.item():.4f}, 'Accuracy: {accuracy:.4f}")

weights=np.array(weights)
grads=np.array(grads)
xs=np.array(xs)
ys=np.array(ys)
logitss=np.array(logitss)
yhats=np.array(yhats)

Step 10/9, Loss: 1.7696, 'Accuracy: 0.0000
Step 20/19, Loss: 0.0932, 'Accuracy: 0.2500
Step 30/29, Loss: 1.9138, 'Accuracy: 0.5000
Step 40/39, Loss: 0.2676, 'Accuracy: 0.5000
Step 50/49, Loss: 1.8297, 'Accuracy: 0.5000
Step 60/59, Loss: 0.4453, 'Accuracy: 0.7500
Step 70/69, Loss: 1.6298, 'Accuracy: 0.7500
Step 80/79, Loss: 0.1628, 'Accuracy: 0.7500
Step 90/89, Loss: 1.3998, 'Accuracy: 0.7500
Step 100/99, Loss: 0.1237, 'Accuracy: 0.7500
Step 110/109, Loss: 1.1775, 'Accuracy: 0.7500
Step 120/119, Loss: 0.0903, 'Accuracy: 1.0000
Step 130/129, Loss: 0.9768, 'Accuracy: 1.0000
Step 140/139, Loss: 0.0681, 'Accuracy: 1.0000
Step 150/149, Loss: 0.8058, 'Accuracy: 1.0000
Step 160/159, Loss: 0.0556, 'Accuracy: 1.0000
Step 170/169, Loss: 0.6652, 'Accuracy: 1.0000
Step 180/179, Loss: 0.0467, 'Accuracy: 1.0000
Step 190/189, Loss: 0.5522, 'Accuracy: 1.0000
Step 200/199, Loss: 0.0399, 'Accuracy: 1.0000
Step 210/209, Loss: 0.4623, 'Accuracy: 1.0000
Step 220/219, Loss: 0.0346, 'Accuracy: 1.0000
Step 230

<Figure size 640x480 with 0 Axes>

In [13]:
weights.shape

(500, 12)

In [14]:
weights[0]

array([-0.20930496,  0.64028716, -0.66689724,  0.4933215 , -0.14890456,
        0.0255919 ,  0.6683024 ,  0.53918844, -0.6991516 , -0.27485383,
        0.62002057,  0.41699004], dtype=float32)

Hmm need to pass in that paris point i want to get my initial h and ys...Ok i think it should start with this poitn now...

In [15]:
logitss[0]

array([-0.6991516 , -0.27485383,  0.62002057,  0.41699004], dtype=float32)

In [16]:
yhats[0]

array([0.10727436, 0.16397065, 0.40124083, 0.32751414], dtype=float32)

In [17]:
all_training_data=np.hstack((xs, ys.reshape(-1, 1), weights, grads, logitss, yhats))
np.save('/Users/stephen/Stephencwelch Dropbox/Stephen Welch/welch_labs/backprop2/hackin/cities_2d_book_1', all_training_data)

In [18]:
X[0]

array([0., 0.])

In [20]:
X[1]

array([ 3.6588, 11.0263])

In [23]:
ys[251]

2

In [27]:
X[251%len(y)]

array([ 3.6499, 11.039 ])

In [32]:
np.round(logitss[251],3)

array([-9.013,  5.52 , 10.276,  4.616], dtype=float32)

In [36]:
np.round(yhats[251], 5)

array([0.     , 0.0085 , 0.98806, 0.00344], dtype=float32)

In [37]:
np.round(weights[251], 3)

array([-0.196, -0.622,  0.229,  0.297,  0.677,  0.758, -0.51 ,  0.609,
       -1.43 ,  1.406, -0.56 , -0.25 ], dtype=float32)

In [43]:
with torch.no_grad():
    out=model(torch.tensor(X[251%len(y)]).float())

In [44]:
out

tensor([-12.3860,   5.6140,  11.5058,   4.9243])

In [38]:
model.output.weight

Parameter containing:
tensor([[-0.1769, -0.9019],
        [ 0.4118,  0.1624],
        [ 0.7940,  0.8812],
        [-0.6729,  0.7363]], requires_grad=True)

In [39]:
model.output.weight.detach().numpy().ravel()

array([-0.17690872, -0.90194637,  0.41175136,  0.16236812,  0.79398596,
        0.8811785 , -0.67285424,  0.73627746], dtype=float32)