In [None]:
#preliminary setup
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
%pip install -r requirements.txt

#Fetching the data
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize

# set seeds for reproducibility
random_seed = 302
torch.manual_seed(302); np.random.seed(302)

X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.1, random_state=random_seed, )

In [None]:
#a.) Investigating the dataset

all_sets = [X_train,y_train,X_validation,y_validation,X_test,y_test]
all_sets_names = ["Training set","Training targtets","Validation set","Validation targets","Test set","Test targets"]
features = ["MedInc","HouseAge","AveRooms","AveBdrms","Population","AveOccup","Latidue","Longitude"]

#Normalizing data
for i,set in enumerate(all_sets[0::2]):
    for col in range(len(features)):
        norm_feature = normalize(np.reshape(set[:,col],(1,len(set[:,col]))))
        all_sets[i*2][:,col] = norm_feature
#TODO: why do the distributions not look different?

#Checking the data, to see what would be good to normalize
for set, name in zip(all_sets[0::2],all_sets_names[0::2]):
    
    print('\033[4m'+'\033[1m'+f"{name}"+'\033[0m'+'\033[0m')
    print(f"size: {str(set.shape) : >10}")
    stats = {"mean":[],
             "std":[],
             "min":[],
             "max":[]}
    fig,axs = plt.subplots(2,len(features)//2,figsize=(8,4),layout = "tight")
    for col,feature in enumerate(features):
        feature_data = set[:,col]
        stats["mean"].append(np.mean(feature_data))
        stats["std"].append(np.std(feature_data))
        stats["min"].append(np.min(feature_data))
        stats["max"].append(np.max(feature_data))
        axs[int(np.floor(col/4))][(col%4)].hist(feature_data,bins=50)
        axs[int(np.floor(col/4))][(col%4)].set_title(feature)

    print('\033[1m'+"          "+f"{'   '.join(features)}"+'\033[0m')
    for stat,vals in stats.items():
        x = [str(round(val,2)) for val in vals]
        print('{:>4}{:>12s}{:>11s}{:>11s}{:>11s}{:>12s}{:>12s}{:>11s}{:>11s}'.format(stat,x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7]))
    
    
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
c = np.linspace(10,20,10)*1-9
Z = -1j/(2*np.pi*5.4e9*c)+37.5
Z = 50*50/Z
plt.figure()
plt.plot(np.real(Z),np.imag(Z),'or')


## Networks

In [2]:
from models import NeuralNet_deep, NeuralNet_wide, NeuralNet_default, NeuralNet_deep_wider, NeuralNet_deeper_wide

def train_model(model, optimizer, train_loader, val_loader, device, num_epochs=5):
    loss_fn = nn.MSELoss()
    train_losses = []
    val_losses = []
    val_accuracies = []
    for epoch in range(num_epochs):
        print('-'*20, f'Epoch {epoch}', '-'*20)
        # Train one epoch
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            predict = model(data)
            loss = loss_fn(predict, target)
            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())

        print(f'Train Epoch {epoch} | Average Training Loss {np.mean(train_losses[-len(train_loader):])}')
        
        # Evaluate on validation set
        model.eval()
        val_loss = 0
        # correct = 0
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                predict = model(data)
                val_loss += F.mse_loss(predict, target, reduction='sum').item()  # sum up batch loss
                # correct += (predict == target).sum().item()

        val_loss /= len(val_loader.dataset)
        # avg_correct = correct / len(val_loader.dataset)
        val_losses.append(val_loss)
        # val_accuracies.append(avg_correct)

        print(f'Validation set: Average loss: {val_loss:.4f}, Accuracy: {0}/{len(val_loader.dataset)} ({100. * 0:.0f}%)\n')
    
    return train_losses, val_losses, val_accuracies
