# Example notebook to use Clever Copy Approach and Chaining of PTM Models

# Data preprocessing

In [None]:
from data_preprocessing import DataPreprocessing

In [None]:
data_preprocessing = DataPreprocessing(
    file_path="/cmnfs/data/proteomics/Prosit_PTMs/TUM_mod_citrullination_2.parquet",
    save_path="example/input_files",
    train_ratio=0.8,
    modification="R[UNIMOD:7]",
    fragmentation="HCD",
    mass_analyzer="FTMS",
    create_smaller_trainset=[5,10]
)

## Clever Copy Approach

### Before Fine-tuning

In [None]:
from CleverCopyPredictorBeforeFinetuning import CleverCopyPredictorBeforeFineTuning

In [None]:
CC_predictor = CleverCopyPredictorBeforeFineTuning(
    modification="cR",
    test_path='example/input_files/test_HCD_cR_max.parquet',
    val_path='example/input_files/val_HCD_cR_max.parquet',
    case='normal',
    path_weights='example/base_prosit.weights.h5',
    save_path='example',
)

In [None]:
CC_predictor.prediction()

In [None]:
CC_predictor.cleanup_weights()

In [None]:
CC_predictor.get_AA_SA_dict()

In [None]:
CC_predictor.get_SA_of_AA("P")

In [None]:
CC_predictor.get_best_AA()

In [None]:
CC_predictor.get_weights_path_of_best_AA()

## Fine-tuning

In [None]:
from CleverCopyPredictorFinetuning import CleverCopyPredictorFineTuning

In [None]:
CC_predictor_finetune = CleverCopyPredictorFineTuning(
    modification="cR",
    train_path='example/input_files/train_HCD_R[UNIMOD:7]_50.parquet',
    test_path='example/input_files/test_HCD_R[UNIMOD:7]_50.parquet',
    val_path='example/input_files/val_HCD_R[UNIMOD:7]_50.parquet',
    trainsize=500,
    path_weights='example/cR/before/weights/cR_M[UNIMOD:35]_500_before.weights.h5',
    save_path='example',
    amino_acid='M[UNIMOD:35]'
)

In [None]:
CC_predictor_finetune.train_model()

In [None]:
CC_predictor_finetune.predict_and_save_results()

In [None]:
CC_predictor_finetune.get_SA_after_ft()

In [None]:
CC_predictor_finetune.get_weights_path_after_ft()

# Create trainsets for chaining

In [None]:
from trainsets_chaining import Trainsets_chaining

In [None]:
Trainsets_chaining(
    chain=["cR","fK","prK"],
    input_path_files="example",
    save_path="example/chaining",
    trainsize=5
)

# Chaining

In [None]:
from chaning_PTMs import Chaining_PTMs

In [None]:
chain=["cR","fK","prK"]

In [None]:
chaining = Chaining_PTMs(
    chain=chain,
    path_data='example',
    save_path='example/chaining',
    base_prosit_weights='base_prosit.weights.h5',
    trainsize=5
)

In [None]:
chaining.chaining()

In [None]:
chaining.get_results()

# Prediction of unseen PTMs

In [None]:
from CleverCopyPredictorBeforeFinetuning import CleverCopyPredictorBeforeFineTuning

In [None]:
CC_predictor = CleverCopyPredictorBeforeFineTuning(
    modification="nY",
    test_path='/cmnfs/home/students/c.kloppert/dlomix_hugginface/example/input_files/test_HCD_nY_max.parquet',
    val_path='/cmnfs/home/students/c.kloppert/dlomix_hugginface/example/input_files/val_HCD_nY_max.parquet',
    case='unseen',
    save_output_to_file=True,
    mods_to_add_to_alphabet=["cR","fK","prK"],
    path_weights='example/chaining/prK/after/weights/prK_M_5_after.weights.h5',
    save_path='/cmnfs/home/students/c.kloppert/dlomix_hugginface/example/chaining/cR-fK-prK',
)

CC_predictor.prediction()
CC_predictor.cleanup_weights()
print(CC_predictor.get_AA_SA_dict())
print(CC_predictor.get_best_AA())