<a href="https://colab.research.google.com/github/Hadrien-Cornier/cool-nn-stuff/blob/main/nn_uncertainty_estimate_boolean_value.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
pip install jax jaxlib tensorflow-datasets numpy -q

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import tensorflow_datasets as tfds
import jax.numpy as jnp
from jax import random, grad, jit, vmap
import jax.scipy.special as jsp
import seaborn as sns
import pandas as pd

# Load the Titanic dataset
titanic = sns.load_dataset('titanic')

# Display the first few rows
print(titanic.head())

   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


In [9]:
# Display feature names and data types
print(titanic.dtypes)

survived          int64
pclass            int64
sex              object
age             float64
sibsp             int64
parch             int64
fare            float64
embarked         object
class          category
who              object
adult_male         bool
deck           category
embark_town      object
alive            object
alone              bool
dtype: object


In [10]:
# Define the features and target
features = ['sex', 'pclass', 'embarked', 'who', 'alone']
target = 'survived'

# Drop rows with missing target values
titanic = titanic.dropna(subset=[target])

# Separate features and target
X = titanic[features]
y = titanic[target]

# Preprocess categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), features)
    ])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply the preprocessor to the training data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

print("Preprocessed feature names:", preprocessor.get_feature_names_out())

Preprocessed feature names: ['cat__sex_female' 'cat__sex_male' 'cat__pclass_1' 'cat__pclass_2'
 'cat__pclass_3' 'cat__embarked_C' 'cat__embarked_Q' 'cat__embarked_S'
 'cat__embarked_nan' 'cat__who_child' 'cat__who_man' 'cat__who_woman'
 'cat__alone_False' 'cat__alone_True']


In [13]:
X_train.shape

(712, 14)

In [16]:
y_train.shape

(712,)

In [14]:
X_test.shape

(179, 14)

In [17]:
y_test.shape

(179,)

In [18]:
# Initialize neural network parameters
def random_layer_params(m, n, key, scale=1e-2):
    w_key, b_key = random.split(key)
    return scale * random.normal(w_key, (n, m)), scale * random.normal(b_key, (n,))

def init_network_params(sizes, key):
    keys = random.split(key, len(sizes))
    return [random_layer_params(m, n, k) for m, n, k in zip(sizes[:-1], sizes[1:], keys)]

layer_sizes = [X_train.shape[1], 64, 64, 2]  # Example architecture
params = init_network_params(layer_sizes, random.PRNGKey(0))

In [21]:
[p[0].shape for p in params]

[(64, 14), (64, 64), (2, 64)]

In [11]:
# Define the neural network and loss function
def relu(x):
    return jnp.maximum(0, x)

def predict(params, x):
    activations = x
    for w, b in params[:-1]: # everything except the last layer
        outputs = jnp.dot(w, activations) + b
        activations = relu(outputs)
    final_w, final_b = params[-1]
    logits = jnp.dot(final_w, activations) + final_b
    alpha, beta = jnp.exp(logits)  # Ensure alpha and beta are positive
    return alpha, beta

def negative_binomial_log_likelihood(params, x, y):
    alpha, beta = predict(params, x)
    log_likelihood = jsp.gammaln(alpha + y) - jsp.gammaln(y + 1) - jsp.gammaln(alpha) + alpha * jnp.log(beta) + y * jnp.log(1 - beta)
    return -jnp.mean(log_likelihood)

loss_grad_fn = jit(grad(negative_binomial_log_likelihood))

# Training loop
learning_rate = 0.01

for epoch in range(10):  # Number of epochs
    grads = loss_grad_fn(params, X_train, y_train)
    params = [(w - learning_rate * dw, b - learning_rate * db) for (w, b), (dw, db) in zip(params, grads)]
    print(f"Epoch {epoch} complete")

# Prediction function
def predict_alpha_beta(params, x):
    alpha, beta = predict(params, x)
    return alpha, beta

# Example usage
example_features = X_test[0]
alpha, beta = predict_alpha_beta(params, example_features)
print(f"Predicted alpha: {alpha}, Predicted beta: {beta}")

TypeError: dot_general requires contracting dimensions to have the same shape, got (14,) and (712,).