# Today, our goal is to produce an automated set of codes that can help us do parameter tuning and model selection. 
## We aim to write down reproducible codes that can be useful for future purposes, as well.

### To get started, we load the relevant packages and performance the standard operations in data preprossing. 

In [3]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import gstools as gs
from sklearn.metrics import mean_squared_error

# Specify the file path
file_path = "/Users/cui/Library/CloudStorage/OneDrive-YaleUniversity/0 High-Dim Spatial/hubmap/ann/B009A_22_03_03_Skywalker_reg001_compensated_ann.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
# print(df)

# shuffle the rows
df = df.sample(frac=1).reset_index(drop=True)

# pick a protein and drop the rows with missing values
protein = 'Synapto'

df = df.dropna(subset=[protein])

# we normalise x and y so that the grid is approximately 1 by 1

df['x'] = df['x']/df['x'].max()
df['y'] = df['y']/df['y'].max()

df_train = df.head(2000)
df_test = df.tail(1000)
x_train = df_train['x']
y_train = df_train['y']
val_train = df_train[protein]

x_test = df_test['x']
y_test = df_test['y']
val_test = df_test[protein]

# grid definition for output field
gridx = np.arange(0.0, 1.05, 0.05)
gridy = np.arange(0.0, 1.05, 0.05)


### Next, we study an example generated by ChatGPT of gradient descent.

In [4]:
import numpy as np

def gradient_descent(x, y, learning_rate=0.01, iterations=1000):
    n = len(y)
    beta0 = 0
    beta1 = 0
    
    for _ in range(iterations):
        y_pred = beta0 + beta1 * x
        error = y - y_pred
        
        # Compute the gradients
        d_beta0 = (-2/n) * sum(error)
        d_beta1 = (-2/n) * sum(error * x)
        
        # Update parameters
        beta0 = beta0 - learning_rate * d_beta0
        beta1 = beta1 - learning_rate * d_beta1
    
    return beta0, beta1

# Example usage:
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])

beta0, beta1 = gradient_descent(x, y)
print(f"Optimal parameters: beta0 = {beta0}, beta1 = {beta1}")

Optimal parameters: beta0 = 0.017400463340610635, beta1 = 1.9951803506719779


### While this is meaningful, we need to be able to write the derivative of loss w.r.t. the parameters.  How can this be done if we do not yet know the explicit form of the model?

In [7]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

ModuleNotFoundError: No module named 'torchvision'