In [91]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import sklearn.metrics
import scipy.stats
import pandas as pd
import numpy as np
import util.Data
# from autosklearn.classification import AutoSklearnClassifier

In [203]:
# Imports
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [14]:
#dataset
csv_filename = r'../repr/data/final_coordinates-conversion-v2.csv'
df_data = util.Data.import_data(csv_filename)

In [15]:
df_data.shape

(656, 26)

In [16]:
df_data.dtypes

importance        object
author            object
title             object
table_name        object
keywords          object
Left/Right        object
Name              object
Broadman Area     object
t                float64
X(R)               int64
Y(A)               int64
Z(S)               int64
z-score          float64
NV               float64
p value          float64
cluster size     float64
index_col          int64
MNIX               int64
MNIY               int64
MNIZ               int64
BA                object
TALX               int64
TALY               int64
TALZ               int64
BA_name           object
BA_no            float64
dtype: object

## Dataframe valid

In [17]:
mask_outside_all = df_data['BA_name'].str.contains('Outside defined BAs')
df_data_valid = df_data[~mask_outside_all]

In [18]:
df_data_valid.shape

(483, 26)

In [19]:
set_keywords_selection = set(
    ["reward magnitude", "anticipation phase", "outcome phase", "decision phase", "working memory", "viewing",
     "monetary reward", "high gain vs high loss", "taste reward", "social reward", "monetary and taste reward", "food ",
     "high calorie", "low calorie", "positive reward", "negative reward", "context-dependent reward", "risk choice",
     "safe choice"])

In [20]:
df = df_data_valid
counter = {}
for val in set_keywords_selection:
    counter[val] = df['keywords'][df['keywords'].str.contains(val)].count()
counter = {k: v for k, v in sorted(counter.items(), key=lambda item: item[1])}
counter

{'working memory': 0,
 'taste reward': 0,
 'safe choice': 0,
 'monetary and taste reward': 0,
 'high calorie': 5,
 'risk choice': 14,
 'high gain vs high loss': 14,
 'food ': 18,
 'decision phase': 18,
 'low calorie': 18,
 'negative reward': 23,
 'viewing': 24,
 'context-dependent reward': 27,
 'anticipation phase': 76,
 'social reward': 84,
 'positive reward': 97,
 'reward magnitude': 146,
 'monetary reward': 277,
 'outcome phase': 378}

In [126]:
# refine keywords
min_val = 20
set_keywords = [key for key in set_keywords_selection if counter[key] >= min_val]
set_keywords

['viewing',
 'negative reward',
 'positive reward',
 'context-dependent reward',
 'outcome phase',
 'monetary reward',
 'anticipation phase',
 'social reward',
 'reward magnitude']

In [127]:
# significance
def significance(row, df=10):
    if not (pd.isna(row['p value'])):
        return abs(row['p value'])
    elif not (pd.isna(row['z-score'])):
        return scipy.stats.norm.sf(row['z-score'])
    elif not (pd.isna(row['t'])):
        return scipy.stats.t.sf(row['t'], df)
    return 0

In [128]:
df_data_valid['significance'] = df_data_valid.apply(significance, axis=1)
df_data_valid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,importance,author,title,table_name,keywords,Left/Right,Name,Broadman Area,t,X(R),...,MNIX,MNIY,MNIZ,BA,TALX,TALY,TALZ,BA_name,BA_no,significance
2,++,Cohen et al,Individual differences in extraversion and dop...,Table 1 Study 2 reward - no reward,fMRI; MNI; extraversion; dopamine; money rewar...,L.,hippocampus,,5.40,-28,...,-28,-24,-14,Left-Hippocampus,-26,-25,-8,Left-Hippocampus,,0.000151
3,++,Cohen et al,Individual differences in extraversion and dop...,Table 1 Study 2 reward - no reward,fMRI; MNI; extraversion; dopamine; money rewar...,L.,orbitofrontal cortex,11,5.31,-21,...,-21,21,-25,Left-ParsOrbitalis (47),-21,16,-20,Left-ParsOrbitalis,47.0,0.000171
4,++,Cohen et al,Individual differences in extraversion and dop...,Table 1 Study 2 reward - no reward,fMRI; MNI; extraversion; dopamine; money rewar...,R.,precuneus,23,5.21,4,...,4,-56,35,Right-DorsalPCC (31),4,-53,33,Right-DorsalPCC,31.0,0.000198
5,++,Cohen et al,Individual differences in extraversion and dop...,Table 1 Study 2 reward - no reward,fMRI; MNI; extraversion; dopamine; money rewar...,R.,orbitofrontal cortex,11,5.11,25,...,25,25,-21,Right-ParsOrbitalis (47),24,20,-16,Right-ParsOrbitalis,47.0,0.000229
6,++,Cohen et al,Individual differences in extraversion and dop...,Table 1 Study 2 reward - no reward,fMRI; MNI; extraversion; dopamine; money rewar...,R.,posterior insula,,4.79,39,...,39,-21,14,Right-PrimAuditory (41),37,-21,16,Right-PrimAuditory,41.0,0.000367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
650,+,Rademacher et al,Dissociation of neural networks for anticipati...,Table 2-2 - Brain regions activation during re...,outcome phase; social reward; SID; fMRI; rewar...,L,superior temporal gyrus,38,,-36,...,-38,10,-27,Left-Temporalpole (38),-36,5,-20,Left-Temporalpole,38.0,0.000009
651,+,Rademacher et al,Dissociation of neural networks for anticipati...,Table 2-2 - Brain regions activation during re...,outcome phase; social reward; SID; fMRI; rewar...,R,cingulate gyrus/posterior cingulate,31,,3,...,3,-53,25,Right-VentPostCing (23),3,-51,25,Right-VentPostCing,23.0,0.000011
652,+,Rademacher et al,Dissociation of neural networks for anticipati...,Table 2-2 - Brain regions activation during re...,outcome phase; social reward; SID; fMRI; rewar...,L,middle temporal gyrus,21,,-56,...,-59,-3,-18,Left-MedTempGyrus (21),-56,-6,-12,Left-MedTempGyrus,21.0,0.000050
653,+,Rademacher et al,Dissociation of neural networks for anticipati...,Table 2-2 - Brain regions activation during re...,outcome phase; social reward; SID; fMRI; rewar...,R,middle frontal gyrus,11,,36,...,38,39,-17,Right-ParsOrbitalis (47),36,34,-12,Right-ParsOrbitalis,47.0,0.000057


In [148]:
# Create y
y = []
x = []
for _, row in df_data_valid.iterrows():
    x.append(np.array([row['MNIX'], row['MNIY'], row['MNIZ']]))
    y.append(
        [round(1 * (1 - row['significance']), 4) if key in row['keywords'] else 0 for key in set_keywords])
        # np.array([1 if key in row['keywords'] else 0 for key in set_keywords]))
y = np.array(y)
x = np.array(x)

In [149]:
# Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train.shape,y_train.shape

((410, 3), (410, 9))

In [150]:
#normalize
#check for outliers

# Model

In [232]:
# get the model
def get_model(n_inputs, n_outputs):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(tf.keras.layers.Dense(40, kernel_initializer='he_uniform', activation='relu'))
    model.add(tf.keras.layers.Dropout(0.4))
    model.add(tf.keras.layers.Dense(n_outputs, kernel_initializer='he_uniform',activation='sigmoid'))
    model.compile(loss='mse', optimizer='adam')
    return model

In [319]:
# get the model
def get_model(n_inputs, n_outputs):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(30, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    # model.add(tf.keras.layers.Dense(40, kernel_initializer='he_uniform', activation='relu'))
    # model.add(tf.keras.layers.Dropout(0.4))
    model.add(tf.keras.layers.Dense(n_outputs, kernel_initializer='he_uniform'))
    model.compile(loss='mae', optimizer='adam')
    return model

In [320]:
n_inputs, n_outputs = x_train.shape[1], y.shape[1]

In [321]:
# get model
model = get_model(n_inputs, n_outputs)
# fit the model on all data
model.fit(x_train, y_train, verbose=1, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<tensorflow.python.keras.callbacks.History at 0x1b0c3181e88>

In [322]:
x_test[0,:]

array([-18.,  44., -22.], dtype=float32)

In [323]:
# evaluate model
# evaluate model on test set
mae = model.evaluate(x_test, y_test, verbose=0)
# summarize performance
print('MAE: %.3f' % (mae))

MAE: 0.209


In [324]:
yhat_all = model.predict(x_test)
print("mse:", sklearn.metrics.mean_absolute_error(y_test, yhat_all))

mse: 0.20914281120250963


In [302]:
yhat = model.predict(x_test[:5,:])

In [307]:
yhat[1]

array([ 0.02148505,  0.05069062,  0.02778497, -0.05018403,  0.92462796,
        0.29057324,  0.00675656,  0.08355724,  0.03858418], dtype=float32)

In [308]:
y_test[:5]

array([[0.    , 0.    , 0.    , 0.    , 0.9998, 0.9998, 0.    , 0.    ,
        0.    ],
       [0.    , 0.    , 0.    , 0.    , 1.    , 0.    , 0.    , 1.    ,
        1.    ],
       [0.995 , 0.    , 0.    , 0.    , 0.995 , 0.    , 0.    , 0.    ,
        0.    ],
       [0.    , 0.    , 0.    , 0.    , 1.    , 0.    , 0.    , 1.    ,
        0.    ],
       [0.    , 0.    , 0.    , 0.    , 0.9995, 0.    , 0.9995, 0.    ,
        0.9995]])

In [309]:
y_t1=[1,1,1,1,1,1]
y_t2=[1,0,0,0,0,1]

In [310]:
print("mse:", sklearn.metrics.mean_squared_error(y_t1, y_t2))

mse: 0.6666666666666666


In [311]:
i = 1
y_predict_1el = model.predict(np.expand_dims(x_test[i,:], axis=0))
y_test_1el =np.expand_dims(y_test[i,:], axis=0)
print("mse:", sklearn.metrics.mean_squared_error(y_test_1el, y_predict_1el))

mse: 0.20674102238676584


In [313]:
x_test[0]

array([-18.,  44., -22.], dtype=float32)

In [314]:
y_test[0]

array([0.    , 0.    , 0.    , 0.    , 0.9998, 0.9998, 0.    , 0.    ,
       0.    ])

In [315]:
x_tst2=[
    [-18,44,-22],
    [-17,44,-22],
    [-19,44,-22],
    [-18,43,-22],
    [-18,45,-22],
    [-18,44,-21],
    [-18,44,-23],
    [-17,43,-21],
    [-18,45,-23],
]

In [317]:
y_pred2 = model.predict(x_tst2)

In [318]:
y_pred2

array([[ 0.06153174,  0.11945936,  0.02218643, -0.04173257,  0.84683245,
         0.5282737 , -0.02734254,  0.07087146,  0.13504738],
       [ 0.06164237,  0.11909673,  0.02400413, -0.04223611,  0.84635276,
         0.5331298 , -0.02734302,  0.07339298,  0.13809532],
       [ 0.06141921,  0.11981937,  0.020363  , -0.04122903,  0.84731406,
         0.5234214 , -0.02733729,  0.06834614,  0.13200134],
       [ 0.0597026 ,  0.11745617,  0.02114502, -0.0407293 ,  0.84880275,
         0.5252944 , -0.02866576,  0.07006466,  0.13011116],
       [ 0.06336089,  0.12146088,  0.0232164 , -0.04274156,  0.844865  ,
         0.5312606 , -0.02601264,  0.07166874,  0.13999313],
       [ 0.06153365,  0.11765119,  0.02170005, -0.04127099,  0.8482029 ,
         0.53578675, -0.02328656,  0.06655895,  0.13842529],
       [ 0.06153365,  0.12127253,  0.02267089, -0.04219415,  0.8454649 ,
         0.52076256, -0.03140042,  0.07518017,  0.13167518],
       [ 0.05981322,  0.11528656,  0.02247825, -0.04076554,  0

In [None]:
print("mse:", sklearn.metrics.mean_squared_error(y_tst2, y_pred2))

In [None]:
print("mse:", sklearn.metrics.mean_squared_error(y_tst2, y_pred2))

In [109]:
print("mse:", sklearn.metrics.mean_squared_error(y_test[0], y_predict))

ValueError: Found input variables with inconsistent numbers of samples: [6, 1]

In [101]:
print("mse:", sklearn.metrics.mean_squared_error(y_test, y_predict))

mse: 0.1855189306554633


array([[0, 1, 1, 0, 0, 0],
       [0, 1, 0, 0, 1, 1],
       [0, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 1, 0],
       [0, 1, 0, 1, 0, 1]])

## Dataframe valid

In [None]:
mask_outside_all = df_data['BA_name'].str.contains('Outside defined BAs')
df_data_valid = df_data[~mask_outside_all]

In [None]:
df_data_valid.shape

In [None]:
set_keywords_selection = set(
    ["reward magnitude", "anticipation phase", "outcome phase", "decision phase", "working memory", "viewing",
     "monetary reward", "high gain vs high loss", "taste reward", "social reward", "monetary and taste reward", "food ",
     "high calorie", "low calorie", "positive reward", "negative reward", "context-dependent reward", "risk choice",
     "safe choice"])

In [None]:
df = df_data_valid
counter = {}
for val in set_keywords_selection:
    counter[val] = df['keywords'][df['keywords'].str.contains(val)].count()
counter = {k: v for k, v in sorted(counter.items(), key=lambda item: item[1])}
counter

In [None]:
# refine keywords
min_val = 20
set_keywords = [key for key in set_keywords_selection if counter[key] >= min_val]
set_keywords

In [None]:
# significance
def significance(row, df=10):
    if not (pd.isna(row['p value'])):
        return abs(row['p value'])
    elif not (pd.isna(row['z-score'])):
        return scipy.stats.norm.sf(row['z-score'])
    elif not (pd.isna(row['t'])):
        return scipy.stats.t.sf(row['t'], df)
    return 0

In [None]:
df_data_valid['significance'] = df_data_valid.apply(significance, axis=1)

In [None]:
# Create y
y = []
x = []
for _, row in df_data_valid.iterrows():
    x.append(np.array([row['MNIX'], row['MNIY'], row['MNIZ']]))
    y.append(
        # [round(1 * (1 - row['significance']), 4) if key in row['keywords'] else 0 for key in set_keywords_selection])
        np.array([1 if key in row['keywords'] else 0 for key in set_keywords]))
y = np.array(y)
x = np.array(x)

In [None]:
# Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23)




In [None]:
# Configuration options
feature_vector_length = 784
num_classes = 10

In [None]:
# Configuration options
feature_vector_length = 784
num_classes = 10

In [2]:
# Configuration options
feature_vector_length = 784
num_classes = 10

# Load the data
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

# Reshape the data - MLPs do not understand such things as '2D'.
# Reshape to 28 x 28 pixels = 784 features
X_train = X_train.reshape(X_train.shape[0], feature_vector_length)
X_test = X_test.reshape(X_test.shape[0], feature_vector_length)

# Convert into greyscale
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

# Convert target classes to categorical ones
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)

# Set the input shape
input_shape = (feature_vector_length,)
print(f'Feature shape: {input_shape}')

# Create the model
model = Sequential()
model.add(Dense(350, input_shape=input_shape, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Configure the model and start training
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=10, batch_size=250, verbose=1, validation_split=0.2)

# Test the model after training
test_results = model.evaluate(X_test, Y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')

Feature shape: (784,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test results - Loss: 0.0757666677236557 - Accuracy: 0.9782999753952026%


In [None]:
# Configuration options
feature_vector_length = 784
num_classes = 10

# Load the data
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

# Reshape the data - MLPs do not understand such things as '2D'.
# Reshape to 28 x 28 pixels = 784 features
X_train = X_train.reshape(X_train.shape[0], feature_vector_length)
X_test = X_test.reshape(X_test.shape[0], feature_vector_length)

# Convert into greyscale
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

# Convert target classes to categorical ones
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)

# Set the input shape
input_shape = (feature_vector_length,)
print(f'Feature shape: {input_shape}')

# Create the model
model = Sequential()
model.add(Dense(350, input_shape=input_shape, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Configure the model and start training
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=10, batch_size=250, verbose=1, validation_split=0.2)

# Test the model after training
test_results = model.evaluate(X_test, Y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')