<a href="https://colab.research.google.com/github/ShreyaSinha14468/context-dependent-valuation/blob/main/Context_dependent_valuation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import ast
import csv

#### Connection to Drive

In [None]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Connection to Git

In [None]:
!apt-get install git
!git config --global user.name "ShreyaSinha14468"
!git config --global user.email "ss14468@nyu.edu"

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.10).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


In [16]:
!git clone https://github.com/ShreyaSinha14468/context-dependent-valuation.git

fatal: destination path 'context-dependent-valuation' already exists and is not an empty directory.


In [17]:
import os
# df.to_csv('your-repo/example.csv', index=False)
os.chdir('context-dependent-valuation')
!git status
# !git add example.csv
# !git commit -m "Add example dataset"
# !git push

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [18]:
!pwd

/content/context-dependent-valuation


## Dataset Generation

In [None]:
class NoisySelectionDataset(Dataset):
    def __init__(self, num_samples, covariance_matrix, num_values=3, mean_cov=[0,0,0], noise_sd=1/3, noise_mean = 0):
        self.num_samples = num_samples
        self.num_values = num_values
        self.cov_matrix = covariance_matrix
        self.mean_cov = mean_cov
        self.noise_sd = noise_sd
        self.noise_mean= noise_mean
        self.dataset = self.generate_dataset()

    def generate_dataset(self):
        samples = []
        for _ in range(self.num_samples):
            true_values = np.random.multivariate_normal(self.mean_cov, self.cov_matrix)
            noisy_values = true_values + np.random.normal(self.noise_mean, self.noise_sd, self.num_values)
            best_index = np.argmax(true_values)
            samples.append({'input': noisy_values, 'output': best_index, 'true_values': true_values})
        return samples

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        sample = self.dataset[idx]
        return {
            'input': torch.tensor(sample['input'], dtype=torch.float32),
            'output': torch.tensor(sample['output'], dtype=torch.long),
            'true_values': torch.tensor(sample['true_values'], dtype=torch.float32)
        }

In [None]:
def calculate_accuracy(predictions, targets):
    _, predicted = torch.max(predictions, 1)
    correct = (predicted == targets).sum().item()
    total = targets.size(0)
    accuracy = correct / total
    return accuracy

In [None]:
# Training Loss Plot
def plot_train_loss(train_losses):
    plt.plot(range(len(train_losses)), train_losses, label='Training Loss')
    plt.title("Training Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [None]:
# Validation Accuracy Plot
def plot_validation_accuracy(val_accuracies):
    plt.plot(range(len(val_accuracies)), val_accuracies, label='Validation Accuracy')
    plt.title("Validation Accuracy")
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

## Custom Loss Function:

In [None]:
def custom_loss_reward_maximization(predictions, inputs, targets):
    probs = torch.nn.functional.softmax(predictions, dim=1)
    rewards = torch.sum(probs * inputs, dim=1)
    # actual_rewards = torch.gather(inputs, 1, targets.view(-1, 1))
    # diff = abs(actual_rewards - rewards)
    # return torch.mean(diff)
    return -torch.mean(rewards)

## Neural Network Model:

In [None]:
num_values = 3

#### Single Layer; 3 neuron units:

In [None]:
class Simple3NN(nn.Module):
    def __init__(self):
        super(Simple3NN, self).__init__()
        self.fc1 = nn.Linear(num_values, 3)
        self.fc2 = nn.Linear(3, num_values)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.tanh(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
# Training Specifications
batch_size = 32
model = Simple3NN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
epochs = 50
train_losses = []
val_losses = []

In [None]:
# Data Specifications
num_train_samples = 10000
num_val_samples = 1000
num_values = 3
covariance_matrix = np.array([[1, 0.1, 0.1], [0.1, 1, 0.1], [0.1, 0.1, 1]])
mean_cov=[0,0,0]
noise_sd=1/3
noise_mean = 0

dataset = NoisySelectionDataset(
    num_samples=num_train_samples,
    num_values=num_values,
    covariance_matrix=covariance_matrix,
    mean_cov=mean_cov,
    noise_sd=noise_sd,
    noise_mean=noise_mean
    )

# Sample Covariance Test
inputs = np.vstack([sample['true_values'] for sample in dataset])
covariance_matrix = np.cov(inputs, rowvar=False)
print("Covariance Matrix:\n")
print(covariance_matrix)

In [None]:
for epoch in range(epochs):

    train_dataset = NoisySelectionDataset(
        num_samples=num_train_samples,
        num_values=num_values,
        covariance_matrix=covariance_matrix,
        mean_cov=mean_cov,
        noise_sd=noise_sd,
        noise_mean=noise_mean
        )

    dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    total_train_samples = 0
    correct_train_samples = 0

    for batch in dataloader:
        inputs = batch['input']
        outputs = batch['output']

        predictions = model(inputs)

        loss = criterion(predictions, outputs)
        # loss = custom_loss_reward_maximization(predictions, inputs, outputs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_samples += outputs.size(0)
        correct_train_samples += (torch.argmax(predictions, dim=1) == outputs).sum().item()

    train_losses.append(loss.item())
    train_accuracy = correct_train_samples / total_train_samples
    print(f'Epoch: {epoch + 1}/{epochs}, Train Loss: {loss.item()}, Train Accuracy: {train_accuracy}')

In [None]:
plot_train_loss(train_losses)

In [None]:
model.eval()
val_dataset = NoisySelectionDataset(
        num_samples=num_val_samples,
        num_values=num_values,
        covariance_matrix=covariance_matrix,
        mean_cov=mean_cov,
        noise_sd=noise_sd,
        noise_mean=noise_mean
        )

validation_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
with torch.no_grad():
        val_losses = []
        val_accuracies = []
        for val_batch in validation_dataloader:
            val_inputs = val_batch['input']
            val_outputs = val_batch['output']
            val_predictions = model(val_inputs)
            # val_loss = custom_loss_reward_maximization(val_predictions, val_inputs, val_outputs)
            val_loss = criterion(val_predictions, val_outputs)
            val_losses.append(val_loss.item())

            val_accuracy = calculate_accuracy(val_predictions, val_outputs)
            val_accuracies.append(val_accuracy)

        avg_val_loss = sum(val_losses) / len(val_losses)
        avg_val_accuracy = sum(val_accuracies) / len(val_accuracies)

        print(f'Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss}, Validation Accuracy: {avg_val_accuracy}')

In [None]:
plot_validation_accuracy(val_accuracies)



```
# This is formatted as code
```

### Regression and RVI Analysis:

In [None]:
coefficients_df = pd.DataFrame(columns=['# Neurons', 'V1', 'V2', 'V3', 'Const', 'RVI'])

In [None]:
model.eval()
train_dataset = NoisySelectionDataset(num_samples=num_train_samples)
dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
with torch.no_grad():
    input_ = torch.cat([batch['input'] for batch in dataloader])
    hidden_activations1 = model.fc1(input_).numpy()
    output_activations = model.fc2(model.tanh(model.fc1(input_))).numpy()

In [None]:
num_neurons = len(hidden_activations1[0])
for i in range(num_neurons):
    X = sm.add_constant(input_.numpy())
    reg_model = sm.OLS(hidden_activations1[:, i], X)
    reg_model_results = reg_model.fit()

    params = reg_model_results.params

    coeff_list = np.array([params[1], params[2], params[3]])
    max_coeff_idx = np.argmax(coeff_list)
    mask = np.ones(coeff_list.size, dtype=bool)
    mask[max_coeff_idx] = False
    sum_others = np.sum(coeff_list[mask])
    RVI = -sum_others/coeff_list[max_coeff_idx]

    list_row = [num_neurons, params[1], params[2], params[3], params[0], RVI]
    coefficients_df.loc[len(coefficients_df)] = list_row

    coefficients_df.to_csv(f'Low_Cover_One_Hidden_Layer_Regression_Results.csv', index=False)


NameError: name 'hidden_activations1' is not defined

#### Single Layer; 6 neuron units:

In [None]:
class Simple6NN(nn.Module):
    def __init__(self):
        super(Simple6NN, self).__init__()
        self.fc1 = nn.Linear(num_values, 6)
        self.tanh = nn.Tanh()
        self.fc2 = nn.Linear(6, num_values)

    def forward(self, x):
        x = self.tanh(self.fc1(x))
        x = self.fc2(x)
        return x