In [11]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from scipy import ndimage
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve
from sklearn.model_selection import cross_val_score

from proglearn.deciders import SimpleArgmaxAverage  # 投票决策（均值取最大）
from proglearn.progressive_learner import ProgressiveLearner
from proglearn.transformers import (
    NeuralClassificationTransformer,    # 基于神经网络 -- SillyN
    TreeClassificationTransformer,  # 基于决策树 -- SiilyF
)
from proglearn.voters import TreeClassificationVoter, KNNClassificationVoter



# Load data

In [12]:
df = pd.read_excel('Human.parcellated_thickness.xlsx')
df.head()

df_sex = pd.read_excel('subjects_age_sex_data_MRI.xlsx')
df_sex.head()

X1_human = []
X2_human = []
y_human = []
IDs = set(df['sid'])
ref_IDs = set(df_sex['ID'])

# match sex and feature
for subject in tqdm(IDs):
    if subject in ref_IDs:
        features = np.array(df[df['sid'] == subject]).reshape(-1)[2:]
        gender = list(df_sex[df_sex['ID'] == subject]['Sex'])
        sex = int(gender[0] == 'FEMALE')

        X1_human.append(list(features[:182]))  # feature1(Markov)
        X2_human.append(list(features[182:]))  # feature2(Scheafer)
        y_human.append(sex)

# switch to numpy array (easy in ML)
X1_human = np.array(X1_human)
X2_human = np.array(X2_human)
y_human = np.array(y_human)

df = pd.read_excel('Macaque.parcellated_thickness.xlsx')
df.head()
df_sex = pd.read_csv('uwmadison.csv')
df_sex.head()
X1_macaque = []
X2_macaque = []
y_macaque = []
IDs = set(df['participant_id'])
ref_IDs = set(df_sex['participant_id'])

for subject in tqdm(IDs):
    if subject in ref_IDs:
        features = np.array(df[df['participant_id'] == subject]).reshape(-1)[4:]
        gender = list(df_sex[df_sex['participant_id'] == subject]['sex'])
        sex = int(gender[0] == 'F')

        X1_macaque.append(list(features[:182]))
        X2_macaque.append(list(features[182:]))
        y_macaque.append(sex)

X1_macaque = np.array(X1_macaque)
X2_macaque = np.array(X2_macaque)
y_macaque = np.array(y_macaque)

valid_indices = ~np.isnan(X1_human).any(axis=1) & ~np.isnan(X2_human).any(axis=1)
X1_human = X1_human[valid_indices]
X2_human = X2_human[valid_indices]
y_human= np.array(y_human)[valid_indices]

valid_indices = ~np.isnan(X1_macaque).any(axis=1) & ~np.isnan(X2_macaque).any(axis=1)
X1_macaque = X1_macaque[valid_indices]
X2_macaque = X2_macaque[valid_indices]
y_macaque= np.array(y_macaque)[valid_indices]

100%|███████████████████████████████████████████████████████████████████████████| 14465/14465 [00:44<00:00, 326.33it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 592/592 [00:00<00:00, 1005.58it/s]


# Naive RF
## Directly

In [3]:
# Markove
reps = 5
accuracy = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    X1_human, y_human, train_size=0.8, random_state=ii, stratify=y_human)
    clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
    clf.fit(x_train,y_train)
    accuracy += np.mean(clf.predict(X1_macaque)==y_macaque)

print('Accuracy is (Naive Markov) ',accuracy/reps)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:58<00:00, 11.79s/it]

Accuracy is (Naive Markov)  0.4989510489510489





In [4]:
# Schaefer
reps = 5
accuracy = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    X2_human, y_human, train_size=0.8, random_state=ii, stratify=y_human)
    clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
    clf.fit(x_train,y_train)
    accuracy += np.mean(clf.predict(X2_macaque)==y_macaque)

print('Accuracy is (Naive Schaefer)',accuracy/reps)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:03<00:00, 12.79s/it]

Accuracy is (Naive Schaefer) 0.4951048951048951





## Using macaque tuned

In [14]:
# Markov
reps = 5
accuracy = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    X1_macaque, y_macaque, train_size=0.8, random_state=ii, stratify=y_macaque)
    clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
    clf.fit(X1_human,y_human)
    clf.fit(x_train, y_train)
    accuracy += np.mean(clf.predict(x_test)==y_test)

print('Accuracy is (Naive with tuning Markov) ',accuracy/reps)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:01<00:00, 36.36s/it]

Accuracy is (Naive with tuning Markov)  0.6434782608695653





In [15]:
# Schaefer
reps = 5
accuracy = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    X2_macaque, y_macaque, train_size=0.8, random_state=ii, stratify=y_macaque)
    clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
    clf.fit(X2_human,y_human)
    clf.fit(x_train, y_train)
    accuracy += np.mean(clf.predict(x_test)==y_test)

print('Accuracy is (Naive with tuning Schaefer) ',accuracy/reps)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:10<00:00, 38.19s/it]

Accuracy is (Naive with tuning Schaefer)  0.6747826086956522





# Silly-F

In [19]:
# Markov
default_transformer_class = TreeClassificationTransformer
default_transformer_kwargs = {
    "kwargs": {"max_depth": 30, "max_features": "sqrt"}
}

default_voter_class = TreeClassificationVoter
default_voter_kwargs = {}

default_decider_class = SimpleArgmaxAverage

reps = 5
accuracy1 = 0.0
accuracy2 = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    X1_macaque, y_macaque, train_size=0.8, random_state=0, stratify=y_macaque)
    
    progressive_learner = ProgressiveLearner(
    default_transformer_class=default_transformer_class,
    default_transformer_kwargs=default_transformer_kwargs,
    default_voter_class=default_voter_class,
    default_voter_kwargs=default_voter_kwargs,
    default_decider_class=default_decider_class,
    )
    
    progressive_learner.add_task(
                X=X1_human,
                y=y_human,
                task_id=0,
                num_transformers=1000,
                transformer_voter_decider_split=[0.67, 0.33, 0],
                decider_kwargs={
                    "classes": np.unique(
                        y_human
                    )
                },
            )

    progressive_learner.add_task(
                X=x_train,
                y=y_train,
                task_id=1,
                num_transformers=1000,
                transformer_voter_decider_split=[0.67, 0.33, 0],
                decider_kwargs={
                    "classes": np.unique(
                        y_train
                    )
                },
            )


    multitask_label = progressive_learner.predict(x_test, task_id=1)
    singletask_label = progressive_learner.predict(x_test, task_id=1, transformer_ids=[1])
    accuracy1 += np.mean(multitask_label==y_test)
    accuracy2 += np.mean(singletask_label==y_test)

print('Human and monkey accuracy (Silly-F Markov)', accuracy1/reps) # 基于人和猴
print('Only money accuracy (Silly-F Markov)', accuracy2/reps) # 只基于猴训练


100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [35:00<00:00, 420.10s/it]

Human and monkey accuracy (Silly-F Markov) 0.6173913043478261
Only money accuracy (Silly-F Markov) 0.671304347826087





In [20]:
# Schaefer
default_transformer_class = TreeClassificationTransformer
default_transformer_kwargs = {
    "kwargs": {"max_depth": 30, "max_features": "sqrt"}
}

default_voter_class = TreeClassificationVoter
default_voter_kwargs = {}

default_decider_class = SimpleArgmaxAverage

reps = 5
accuracy1 = 0.0
accuracy2 = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    X2_macaque, y_macaque, train_size=0.8, random_state=0, stratify=y_macaque)

    progressive_learner = ProgressiveLearner(
    default_transformer_class=default_transformer_class,
    default_transformer_kwargs=default_transformer_kwargs,
    default_voter_class=default_voter_class,
    default_voter_kwargs=default_voter_kwargs,
    default_decider_class=default_decider_class,
    )

    
    progressive_learner.add_task(
                X=X2_human,
                y=y_human,
                task_id=0,
                num_transformers=1000,
                transformer_voter_decider_split=[0.67, 0.33, 0],
                decider_kwargs={
                    "classes": np.unique(
                        y_human
                    )
                },
            )

    progressive_learner.add_task(
                X=x_train,
                y=y_train,
                task_id=1,
                num_transformers=1000,
                transformer_voter_decider_split=[0.67, 0.33, 0],
                decider_kwargs={
                    "classes": np.unique(
                        y_train
                    )
                },
            )


    multitask_label = progressive_learner.predict(x_test, task_id=1)
    singletask_label = progressive_learner.predict(x_test, task_id=1, transformer_ids=[1])
    accuracy1 += np.mean(multitask_label==y_test)
    accuracy2 += np.mean(singletask_label==y_test)


print('Human and monkey accuracy (Silly-F Schaefer)', accuracy1/reps) 
print('Only money accuracy (Silly-F Schaefer)', accuracy2/reps) 

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [35:04<00:00, 420.91s/it]

Human and monkey accuracy (Silly-F Schaefer) 0.6173913043478261
Only money accuracy (Silly-F Schaefer) 0.6156521739130435





In [25]:
# both
default_transformer_class = TreeClassificationTransformer
default_transformer_kwargs = {
    "kwargs": {"max_depth": 30, "max_features": "sqrt"}
}

default_voter_class = TreeClassificationVoter
default_voter_kwargs = {}

default_decider_class = SimpleArgmaxAverage

reps = 5
accuracy1 = 0.0
accuracy2 = 0.0

for ii in tqdm(range(reps)):
    x_train, x_test, y_train, y_test = train_test_split(
                    np.hstack((X1_macaque, X2_macaque)), y_macaque, train_size=0.8, random_state=0, stratify=y_macaque)

    progressive_learner = ProgressiveLearner(
    default_transformer_class=default_transformer_class,
    default_transformer_kwargs=default_transformer_kwargs,
    default_voter_class=default_voter_class,
    default_voter_kwargs=default_voter_kwargs,
    default_decider_class=default_decider_class,
    )

    
    progressive_learner.add_task(
                X=np.hstack((X1_human,X2_human)),
                y=y_human,
                task_id=0,
                num_transformers=1000,
                transformer_voter_decider_split=[0.67, 0.33, 0],
                decider_kwargs={
                    "classes": np.unique(
                        y_human
                    )
                },
            )

    progressive_learner.add_task(
                X=x_train,
                y=y_train,
                task_id=1,
                num_transformers=1000,
                transformer_voter_decider_split=[0.67, 0.33, 0],
                decider_kwargs={
                    "classes": np.unique(
                        y_train
                    )
                },
            )


    multitask_label = progressive_learner.predict(x_test, task_id=1)
    singletask_label = progressive_learner.predict(x_test, task_id=1, transformer_ids=[1])
    accuracy1 += np.mean(multitask_label==y_test)
    accuracy2 += np.mean(singletask_label==y_test)


print('Human and monkey accuracy (Silly-F both)', accuracy1/reps) 
print('Only money accuracy (Silly-F both)', accuracy2/reps) 

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [44:32<00:00, 534.45s/it]

Human and monkey accuracy (Silly-F both) 0.6191304347826087
Only money accuracy (Silly-F both) 0.6313043478260869





# Silly-N

In [None]:
from proglearn import LifelongClassificationNetwork
# proglearn.LifelongClassificationNetwork(network, loss='categorical_crossentropy', 
# optimizer=<keras.optimizer_v2.adam.Adam object>, epochs=100, batch_size=32, verbose=False, 
# default_network_construction_proportion=0.67)
