In [594]:
import os
import random
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, accuracy_score, matthews_corrcoef, f1_score, recall_score

seed = 42
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

## 1. Prepare data
### 1.1. Read data

In [595]:
level = "1"
data_path = f"/data/lujd/TCRdata/benchmarks/level{level}/"
train_df = pd.read_csv(os.path.join(data_path,"train_data_fold{}.csv".format(0)))
valid_df = pd.read_csv(os.path.join(data_path,"valid_data_fold{}.csv".format(0)))
test_df = pd.read_csv(os.path.join(data_path,"test_data_fold{}.csv".format(0)))
tcr2candidates = np.load(os.path.join(data_path, "tcr2candidates_pools.npy"), allow_pickle=True).tolist()

# rename beta'/'ab' -> 'pos_tcr'
if 'beta' in train_df.columns:
    train_df.rename(columns={'beta':'pos_tcr'}, inplace=True)
    valid_df.rename(columns={'beta':'pos_tcr'}, inplace=True)
    test_df.rename(columns={'beta':'pos_tcr'}, inplace=True)
elif 'ab' in train_df.columns:
    train_df.rename(columns={'ab':'pos_tcr'}, inplace=True)
    valid_df.rename(columns={'ab':'pos_tcr'}, inplace=True)
    test_df.rename(columns={'ab':'pos_tcr'}, inplace=True)
elif 'tcr' in train_df.columns:
    train_df.rename(columns={'tcr':'pos_tcr'}, inplace=True)
    valid_df.rename(columns={'tcr':'pos_tcr'}, inplace=True)
    test_df.rename(columns={'tcr':'pos_tcr'}, inplace=True)
else:
    raise TypeError("Wrong data format!")

train_df.head(3)

Unnamed: 0,pep,pos_tcr,label
0,YPDKVFRSS,CASSLEVTSQETQYF,1
1,AFLLFLVLI,CASSFTGTLNYGYTF,1
2,FYLCFLAFL,CASSYVGNEQFF,1


### 1.2. Select the same number of negative samples for each dataset

In [596]:
set_seed(seed)

def add_random_neg_samples(df, neg_tcr_candidates):
    df = df[df['label'] == 1]
    true_pep2tcr_mapping = df.groupby('pep')['pos_tcr'].unique().to_dict()

    new_rows = []
    for index, row in df.iterrows():
        pep = row['pep']
        features = row.drop('label')

        # sample negative tcr
        binding_tcrs = set(true_pep2tcr_mapping.get(pep))
        neg_tcr = random.choice(neg_tcr_candidates)
        while neg_tcr in binding_tcrs:
            neg_tcr = random.choice(neg_tcr_candidates)
        # print(neg_tcr)

        new_row = {'neg_tcr': neg_tcr}
        new_row.update(features)
        new_rows.append(new_row)

    return pd.DataFrame(new_rows)

train_df = add_random_neg_samples(train_df, tcr2candidates)
valid_df = add_random_neg_samples(valid_df, tcr2candidates)

test_dfs = []
for _ in range(5):
    # test_df = add_random_neg_samples(test_df, tcr2candidates)
    test_dfs.append(add_random_neg_samples(test_df, tcr2candidates))

train_df.head(3)

Unnamed: 0,neg_tcr,pep,pos_tcr
0,CASSVGASGGRYNEQFF,YPDKVFRSS,CASSLEVTSQETQYF
1,ASSFGGIGNEQF,AFLLFLVLI,CASSFTGTLNYGYTF
2,ASSLEGEYAQPQH,FYLCFLAFL,CASSYVGNEQFF


In [597]:
test_dfs[0].head(3)

Unnamed: 0,neg_tcr,pep,pos_tcr
0,CASSQDPWASGNEQYF,KLGGALQAK,SAWGIQDTQY
1,ASVVVGNEQF,YLCFLAFLL,CSADGLAGYQETQYF
2,ASSPGLGGASTDTQY,FYLCFLAFLL,CASSEEGGVPNYGYTF


In [598]:
test_dfs[1].head(3)

Unnamed: 0,neg_tcr,pep,pos_tcr
0,CASSSSGLTYEQYF,KLGGALQAK,SAWGIQDTQY
1,CASSHPQGVTSRSGNTIYF,YLCFLAFLL,CSADGLAGYQETQYF
2,ASSGGITGEETQY,FYLCFLAFLL,CASSEEGGVPNYGYTF


## 2. Data processing

### 2.1. pad then cat

- Uniform input length

In [599]:
if level in ['1']:
    pep_max_len = 15
    tcr_max_len_single = 19
elif level in ['2a', '2a_basic', '2b', '2b_basic', '3', '3_basic']:
    pep_max_len = 10
    tcr_max_len_single = 19
elif level in ['4', '4_basic']:
    pep_max_len = 10
    tcr_max_len_single = 121
print(f'level-{level}: pep_max_len={pep_max_len}, tcr_max_len(single chain)={tcr_max_len_single}')

train_df['pep'] = train_df['pep'].apply(lambda pep: pep.ljust(pep_max_len, 'X'))
train_df['pos_tcr'] = train_df['pos_tcr'].apply(
        lambda tcr: ''.join([s.ljust(tcr_max_len_single, 'X') for s in tcr.split('/')]))
train_df['neg_tcr'] = train_df['neg_tcr'].apply(
        lambda tcr: ''.join([s.ljust(tcr_max_len_single, 'X') for s in tcr.split('/')]))
print(train_df['pep'].str.len().unique(),
      train_df['pos_tcr'].str.len().unique(), 
      train_df['neg_tcr'].str.len().unique())

valid_df['pep'] = valid_df['pep'].apply(lambda pep: pep.ljust(pep_max_len, 'X'))
valid_df['pos_tcr'] = valid_df['pos_tcr'].apply(
        lambda tcr: ''.join([s.ljust(tcr_max_len_single, 'X') for s in tcr.split('/')]))
valid_df['neg_tcr'] = valid_df['neg_tcr'].apply(
        lambda tcr: ''.join([s.ljust(tcr_max_len_single, 'X') for s in tcr.split('/')]))
print(valid_df['pep'].str.len().unique(),
      valid_df['pos_tcr'].str.len().unique(), 
      valid_df['neg_tcr'].str.len().unique())

for df in test_dfs: 
    df['pep'] = df['pep'].apply(lambda pep: pep.ljust(pep_max_len, 'X'))
    df['pos_tcr'] = df['pos_tcr'].apply(
        lambda tcr: ''.join([s.ljust(tcr_max_len_single, 'X') for s in tcr.split('/')]))
    df['neg_tcr'] = df['neg_tcr'].apply(
        lambda tcr: ''.join([s.ljust(tcr_max_len_single, 'X') for s in tcr.split('/')]))
    print(df['pep'].str.len().unique(),
        df['pos_tcr'].str.len().unique(), 
        df['neg_tcr'].str.len().unique())

level-1: pep_max_len=15, tcr_max_len(single chain)=19
[15] [19] [19]
[15] [19] [19]
[15] [19] [19]
[15] [19] [19]
[15] [19] [19]
[15] [19] [19]
[15] [19] [19]


In [600]:
test_dfs[1].head(3)

Unnamed: 0,neg_tcr,pep,pos_tcr
0,CASSSSGLTYEQYFXXXXX,KLGGALQAKXXXXXX,SAWGIQDTQYXXXXXXXXX
1,CASSHPQGVTSRSGNTIYF,YLCFLAFLLXXXXXX,CSADGLAGYQETQYFXXXX
2,ASSGGITGEETQYXXXXXX,FYLCFLAFLLXXXXX,CASSEEGGVPNYGYTFXXX


In [601]:
def build_full_df(df):
    if 'hla' in df.columns:
        pos_df = df[['pep', 'hla', 'pos_tcr']]
        neg_df = df[['pep', 'hla', 'neg_tcr']]
    else:
        pos_df = df[['pep', 'pos_tcr']]
        neg_df = df[['pep', 'neg_tcr']]
    pos_df.rename(columns={'pos_tcr':'tcr'}, inplace=True)
    print(len(pos_df))
    pos_df['label'] = 1
    neg_df.rename(columns={'neg_tcr':'tcr'}, inplace=True)
    print(len(neg_df))
    neg_df.loc[:, 'label'] = 0
    return pd.concat((pos_df, neg_df), axis=0).reset_index(drop=True)

train_full_df = build_full_df(train_df)
valid_full_df = build_full_df(valid_df)

test_full_dfs = []
for df in test_dfs: 
    test_full_dfs.append(build_full_df(df))

548026
548026
68503
68503
68504
68504
68504
68504


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pos_df.rename(columns={'pos_tcr':'tcr'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  neg_df.rename(columns={'neg_tcr':'tcr'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pos_df.rename(columns={'pos_tcr':'tcr'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  neg

68504
68504
68504
68504
68504
68504


In [602]:
if 'hla' in train_full_df.columns:
    train_full_df['cat_seq'] = train_full_df['tcr'] + train_full_df['pep'] + train_full_df['hla']
    valid_full_df['cat_seq'] = valid_full_df['tcr'] + valid_full_df['pep'] + valid_full_df['hla']
    for df in test_full_dfs: 
        df['cat_seq'] = df['tcr'] + df['pep'] + df['hla']
else:
    train_full_df['cat_seq'] = train_full_df['tcr'] + train_full_df['pep']
    valid_full_df['cat_seq'] = valid_full_df['tcr'] + valid_full_df['pep']
    for df in test_full_dfs: 
        df['cat_seq'] = df['tcr'] + df['pep']

train_full_df.head(3)

Unnamed: 0,pep,tcr,label,cat_seq
0,YPDKVFRSSXXXXXX,CASSLEVTSQETQYFXXXX,1,CASSLEVTSQETQYFXXXXYPDKVFRSSXXXXXX
1,AFLLFLVLIXXXXXX,CASSFTGTLNYGYTFXXXX,1,CASSFTGTLNYGYTFXXXXAFLLFLVLIXXXXXX
2,FYLCFLAFLXXXXXX,CASSYVGNEQFFXXXXXXX,1,CASSYVGNEQFFXXXXXXXFYLCFLAFLXXXXXX


### 2.2. Shuffle Training data (Optional)

In [603]:
SHUFFLE_TRAIN = True
if SHUFFLE_TRAIN:
    train_full_df = train_full_df.sample(frac=1, random_state=seed).reset_index(drop=True)

train_full_df.head(3)

Unnamed: 0,pep,tcr,label,cat_seq
0,LIDFYLCFLXXXXXX,ASSELGEAFXXXXXXXXXX,0,ASSELGEAFXXXXXXXXXXLIDFYLCFLXXXXXX
1,SLIDFYLCFLXXXXX,CASSLPLSYEQYFXXXXXX,1,CASSLPLSYEQYFXXXXXXSLIDFYLCFLXXXXX
2,MIELSLIDFYXXXXX,CASSSYQGAYEQYFXXXXX,0,CASSSYQGAYEQYFXXXXXMIELSLIDFYXXXXX


## 3. Feature extraction

### 3.1. Encoding an amino acid to a numeric vetcor

- Onehot encoding

In [604]:
encoder = OneHotEncoder(categories='auto', sparse_output=False, handle_unknown='ignore')

# train
train_features = train_full_df['cat_seq'].values
train_labels = train_full_df['label'].values

train_features = [[s for s in seq] for seq in train_features]
train_features = np.array(train_features).reshape(-1, 1)            # [len(df)*seq_len, 1]

train_features = encoder.fit_transform(train_features)
train_features = train_features.reshape((len(train_full_df),-1))    # [len(df), seq_len]
print(train_features.shape, train_labels.shape)

print(encoder.categories_ )        # O

(1096052, 748) (1096052,)
[array(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O',
       'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y'], dtype='<U1')]


In [605]:
# valid
valid_features = valid_full_df['cat_seq'].values
valid_labels = valid_full_df['label'].values

valid_features = [[s for s in seq] for seq in valid_features]
valid_features = np.array(valid_features).reshape(-1, 1)            # [len(df)*seq_len, 1]

valid_features = encoder.transform(valid_features)
valid_features = valid_features.reshape((len(valid_full_df),-1))    # [len(df), seq_len]
print(valid_features.shape, valid_labels.shape)

(137006, 748) (137006,)


In [606]:
# test
test_features_list = []
test_labels_list = []
for test_full_df in test_full_dfs:
    test_features = test_full_df['cat_seq'].values
    test_labels = test_full_df['label'].values

    test_features = [[s for s in seq] for seq in test_features]
    test_features = np.array(test_features).reshape(-1, 1)            # [len(df)*seq_len, 1]

    test_features = encoder.transform(test_features)
    test_features = test_features.reshape((len(test_full_df),-1))    # [len(df), seq_len]
    print(test_features.shape, test_labels.shape)

    test_features_list.append(test_features)
    test_labels_list.append(test_labels)

(137008, 748) (137008,)
(137008, 748) (137008,)
(137008, 748) (137008,)
(137008, 748) (137008,)
(137008, 748) (137008,)


### 3.2. Scaler (Optional)

In [607]:
SCALE = False   # no need
if SCALE:
    scaler = StandardScaler()
    train_features = scaler.fit_transform(train_features)
    valid_features = scaler.transform(valid_features)
    for i in range(len(test_features_list)):
        test_features_list[i] = scaler.transform(test_features_list[i])

## 4. Model training
### 4.1. Prepare metrics

In [608]:
def cal_metrics(labels, y_pred, y_prob=[]):     # y_pred: label, y_prob: probability
    if len(y_prob) == 0:
        auc = roc_auc_score(labels, y_pred)     # same as acc
    else:
        assert len(labels) == len(y_prob)
        auc = roc_auc_score(labels, y_prob)
    acc = accuracy_score(labels, y_pred)
    mcc = matthews_corrcoef(labels, y_pred)
    f1 = f1_score(labels, y_pred)
    recall = recall_score(labels, y_pred)

    print("Validation Performance:")
    print(f"- AUC: {auc:.6f} | ACC: {acc:.6f} | MCC: {mcc:.6f} | F1:  {f1:.6f} | Recall:  {recall:.6f}")

    return (auc, acc, mcc, f1), recall

### 4.2. RandomForest

#### 4.2.1. Train RandomForest
<!-- - GridSeach using train data -->
- Use GridSeach for important hyperparameters

In [611]:
rf_model = RandomForestClassifier(n_estimators=300,     # 100, 200, 300
                                  # max_features=20, 
                                  # max_depth=10,       # None, 10, 20
                                  random_state=seed, 
                                  n_jobs=-1,
                                  bootstrap=True)

print(">>> Training starts.")
rf_model.fit(train_features, train_labels)
print(">>> Training ends.")

print(">>> Validating starts.")
y_valid_pred = rf_model.predict(valid_features)
y_valid_prob = rf_model.predict_proba(valid_features)[:, 1]
metrics, recall = cal_metrics(valid_labels, y_valid_pred, y_valid_prob)
print(f"- Average (AUC, ACC, MCC, F1): {sum(metrics)/len(metrics):6f}")
print(">>> Validating ends.")

>>> Training starts.
>>> Training ends.
>>> Validating starts.
Validation Performance:
- AUC: 0.819835 | ACC: 0.729596 | MCC: 0.465638 | F1:  0.750298 | Recall:  0.812505
- Average (AUC, ACC, MCC, F1): 0.691342
>>> Validating ends.


||1|2a|2a_basic|2b|2b_basic|3|3_basic|4|4_basic|
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|n_estimators|300|100|100|100|100|300|300|100|100|
|max_depth|None|20|20|10|10|10|10|10|10|

- Empty means default

- use max_depth: 0.68->0.72

- max_features: no use

#### 4.2.2. Test RandomForest

In [612]:
aucs, accs, mccs, f1s, recalls = [], [], [], [], []
for test_features, test_labels in zip(test_features_list, test_labels_list):
    y_test_pred = rf_model.predict(test_features)               # threshold=0.5 (>0.5: 1)
    y_test_prob = rf_model.predict_proba(test_features)[:, 1]

    test_metrics, test_recall = cal_metrics(test_labels, y_test_pred, y_test_prob)
    print(f"- Average (AUC, ACC, MCC, F1): {sum(test_metrics)/len(test_metrics):6f}")
    
    aucs.append(test_metrics[0])
    accs.append(test_metrics[1])
    mccs.append(test_metrics[2])
    f1s.append(test_metrics[3])
    recalls.append(test_recall)

print("Average on 5 test sets: ")
print(f"- AUC: {sum(aucs)/len(aucs):.6f} | ACC: {sum(accs)/len(accs):.6f} | MCC: {sum(mccs)/len(mccs):.6f} | F1:  {sum(f1s)/len(f1s):.6f} | Recall:  {sum(recalls)/len(recalls):.6f}")

Validation Performance:
- AUC: 0.820690 | ACC: 0.731103 | MCC: 0.468523 | F1:  0.751442 | Recall:  0.812931
- Average (AUC, ACC, MCC, F1): 0.692940
Validation Performance:
- AUC: 0.819538 | ACC: 0.730081 | MCC: 0.466613 | F1:  0.750733 | Recall:  0.812931
- Average (AUC, ACC, MCC, F1): 0.691742
Validation Performance:
- AUC: 0.821225 | ACC: 0.731249 | MCC: 0.468796 | F1:  0.751544 | Recall:  0.812931
- Average (AUC, ACC, MCC, F1): 0.693203
Validation Performance:
- AUC: 0.819985 | ACC: 0.731125 | MCC: 0.468564 | F1:  0.751457 | Recall:  0.812931
- Average (AUC, ACC, MCC, F1): 0.692783
Validation Performance:
- AUC: 0.819155 | ACC: 0.728804 | MCC: 0.464226 | F1:  0.749849 | Recall:  0.812931
- Average (AUC, ACC, MCC, F1): 0.690509
Average on 5 test sets: 
- AUC: 0.820119 | ACC: 0.730473 | MCC: 0.467345 | F1:  0.751005 | Recall:  0.812931


### 4.3. (Optional) XGBoost

#### 4.3.1. Train XGBoost

In [613]:
import xgboost as xgb

parameters = {'objective': 'binary:logistic', 
              'eval_metric': 'auc', 
              'eta': 0.3,       # learning_rate
              'seed': seed}
num_round = 2000

dtrain = xgb.DMatrix(train_features, label=train_labels)
dvalid = xgb.DMatrix(valid_features, label=valid_labels)
xgb_model = xgb.train(parameters, 
               dtrain, 
               num_boost_round=num_round,
               evals=[(dtrain,'train'), (dvalid,'val')],
               early_stopping_rounds=20,        # If you don't stop early, yo u will overfit. 20 is a more appropriate value when eta = 0.3.
               verbose_eval=True
               )

[0]	train-auc:0.67385	val-auc:0.66914
[1]	train-auc:0.67645	val-auc:0.67251
[2]	train-auc:0.68506	val-auc:0.67935
[3]	train-auc:0.68710	val-auc:0.68118
[4]	train-auc:0.69205	val-auc:0.68533
[5]	train-auc:0.69217	val-auc:0.68552
[6]	train-auc:0.69578	val-auc:0.68895
[7]	train-auc:0.69794	val-auc:0.69121
[8]	train-auc:0.69836	val-auc:0.69173
[9]	train-auc:0.69895	val-auc:0.69229
[10]	train-auc:0.70201	val-auc:0.69514
[11]	train-auc:0.70552	val-auc:0.69834
[12]	train-auc:0.70591	val-auc:0.69868
[13]	train-auc:0.70802	val-auc:0.70064
[14]	train-auc:0.70912	val-auc:0.70142
[15]	train-auc:0.71065	val-auc:0.70274
[16]	train-auc:0.71185	val-auc:0.70384
[17]	train-auc:0.71450	val-auc:0.70601
[18]	train-auc:0.71698	val-auc:0.70814
[19]	train-auc:0.71864	val-auc:0.70943
[20]	train-auc:0.71951	val-auc:0.70973
[21]	train-auc:0.72043	val-auc:0.71061
[22]	train-auc:0.72102	val-auc:0.71132
[23]	train-auc:0.72186	val-auc:0.71183
[24]	train-auc:0.72248	val-auc:0.71235
[25]	train-auc:0.72329	val-auc:0.71

||1|2a|2a_basic|2b|2b_basic|3|3_basic|4|4_basic|
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|eta|0.3|0.3|0.3|0.3|0.3|0.3|0.3|0.3|0.3|
|early stop|20|20|20|20|20|20|20|20|20|
|num_rounds|2000|500|500|500|500|500|500|500|500|

#### 4.3.2. Test XGBoost

In [614]:
aucs, accs, mccs, f1s, recalls = [], [], [], [], []
for test_features, test_labels in zip(test_features_list, test_labels_list):
    dtest = xgb.DMatrix(test_features)
    y_prob = xgb_model.predict(dtest)
    # y_prob = xgb_model.predict(dtest, iteration_range=(0, xgb_model.best_iteration+1))        # worse result

    threshold = 0.5                             # threshold
    y_pred = (y_prob > threshold).astype(int)   # same as >=

    metrics, recall = cal_metrics(test_labels, y_pred, y_prob)
    print(f"- Average (AUC, ACC, MCC, F1): {sum(metrics)/len(metrics):6f}")

    aucs.append(metrics[0])
    accs.append(metrics[1])
    mccs.append(metrics[2])
    f1s.append(metrics[3])
    recalls.append(recall)

print("Average on 5 test sets: ")
print(f"- AUC: {sum(aucs)/len(aucs):.6f} | ACC: {sum(accs)/len(accs):.6f} | MCC: {sum(mccs)/len(mccs):.6f} | F1:  {sum(f1s)/len(f1s):.6f} | Recall:  {sum(recalls)/len(recalls):.6f}")

Validation Performance:
- AUC: 0.791462 | ACC: 0.709426 | MCC: 0.426681 | F1:  0.734719 | Recall:  0.804771
- Average (AUC, ACC, MCC, F1): 0.665572
Validation Performance:
- AUC: 0.792309 | ACC: 0.710302 | MCC: 0.428318 | F1:  0.735307 | Recall:  0.804771
- Average (AUC, ACC, MCC, F1): 0.666559
Validation Performance:
- AUC: 0.791372 | ACC: 0.709462 | MCC: 0.426749 | F1:  0.734743 | Recall:  0.804771
- Average (AUC, ACC, MCC, F1): 0.665582
Validation Performance:
- AUC: 0.790133 | ACC: 0.708630 | MCC: 0.425194 | F1:  0.734186 | Recall:  0.804771
- Average (AUC, ACC, MCC, F1): 0.664536
Validation Performance:
- AUC: 0.790185 | ACC: 0.707265 | MCC: 0.422645 | F1:  0.733273 | Recall:  0.804771
- Average (AUC, ACC, MCC, F1): 0.663342
Average on 5 test sets: 
- AUC: 0.791092 | ACC: 0.709017 | MCC: 0.425917 | F1:  0.734445 | Recall:  0.804771


- (2b)if not early stop
> Validation Performance:
    - AUC: 0.735394
    - ACC: 0.659510
    - MCC: 0.321043
    - F1:  0.677575
    - Recall:  0.715539
    Average (AUC, ACC, MCC, F1): 0.598381

- (2b)if early stop, round=20
> Validation Performance:
    - AUC: 0.740468
    - ACC: 0.665794
    - MCC: 0.334877
    - F1:  0.687630
    - Recall:  0.735698
    Average (AUC, ACC, MCC, F1): 0.607192