In [None]:
import os
import sys
import datetime
from pathlib import Path
import logging
from main import SigmoidFitter, MeltomeAtlasHandler, setup_logging

import json
import numpy as np
import pandas as pd
import seaborn as sns

now = datetime.datetime.now()
timestamp_str = now.strftime("%Y-%m-%d_%H-%M")

output_path = f'./results_meltome_{timestamp_str}'
setup_logging(f'./testing_{timestamp_str}.log', logging.DEBUG)

2025-12-10 17:29:16 - root - INFO - Logging to file: ./2025-12-10_17-29_testing.log


In [None]:
# SCRIPT_DIR = Path(__file__).resolve().parent
CWD = Path(os.getcwd()).resolve()
PROJECT_ROOT = CWD.parent.parent
print(PROJECT_ROOT)
file_path = PROJECT_ROOT / "Data" / "FLIP" / "splits" / "meltome" / "full_dataset.json"
print(file_path)

C:\Users\alexa\Documents\PROHITS
C:\Users\alexa\Documents\PROHITS\Data\FLIP\splits\meltome\full_dataset.json


In [21]:
# Sample subset of Meltome
data_handler = MeltomeAtlasHandler(file_path, output_path, logging.DEBUG)
print(data_handler.data.shape)
df = data_handler.select_subset(100)
chunk0 = data_handler.data.iloc[:25]
data_handler.data = df 
print(data_handler.data.shape)

2025-12-10 17:29:16 - MeltomeAtlasHandler - INFO - Meltome Handler initialization
2025-12-10 17:29:16 - MeltomeAtlasHandler - DEBUG - DEBUG Mode
2025-12-10 17:29:19 - MeltomeAtlasHandler - INFO - Meltome data loaded from C:\Users\alexa\Documents\PROHITS\Data\FLIP\splits\meltome\full_dataset.json
2025-12-10 17:29:19 - MeltomeAtlasHandler - INFO - Meltome header ['proteinId', 'uniprotAccession', 'runName', 'meltingPoint', 'meltingBehaviour', 'sequence', 'quantNormMeltingPoint']
2025-12-10 17:29:19 - MeltomeAtlasHandler - INFO - Output directory: c:\Users\alexa\Documents\PROHITS\Code\MeltingBehaviourCLI\results_meltome_2025-12-10_17-29
(221203, 7)
(2500, 7)


In [22]:
n = np.arange(20)
s = np.array_split(n, 10)
print(data_handler.data.shape[0], len(s), np.sum([len(i) for i in s]))

2500 10 20


In [23]:
test_data = pd.DataFrame(data_handler.data.iloc[0].meltingBehaviour)

In [24]:
test_data

Unnamed: 0,temperature,channel,fold_change
0,25.0,TMT126,1.0
1,36.0,TMT127H,1.020938
2,30.0,TMT127L,0.988506
3,45.0,TMT128H,0.872294
4,39.0,TMT128L,0.949953
5,56.0,TMT129H,0.318029
6,50.0,TMT129L,0.679647
7,66.9,TMT130H,0.832007
8,59.6,TMT130L,0.627966
9,70.4,TMT131L,1.078205


In [25]:
t = SigmoidFitter()
print(t.get_statistics())
r = t.fit_curve(test_data.temperature.to_numpy(), test_data.fold_change.to_numpy())
print(t.get_statistics())

{'rmse': nan, 'r_squared': nan}
{'rmse': np.float64(0.1774227960743552), 'r_squared': np.float64(0.36193825549188574)}


In [26]:
x = t.get_melting_temp()
print(x)

55.07


  y = (1 - pl) / (1 + np.exp(b - (a / x))) + pl


In [27]:
results0 = data_handler.process_chunk(chunk0)

2025-12-10 17:29:20 - MeltomeAtlasHandler - INFO - Chunk progress : 0 / 25, pid : A0A023PXQ4, specie : Saccharomyces cerevisiae lysate
2025-12-10 17:29:20 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:20 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9918, RMSE = 0.0280
2025-12-10 17:29:20 - SigmoidFitter - DEBUG - Fitted parameters: pl = -0.387047, a = 396.281525, b = 6.904978
2025-12-10 17:29:20 - MeltomeAtlasHandler - INFO - Chunk progress : 1 / 25, pid : A0A023T4K3, specie : Caenorhabditis elegans lysate
2025-12-10 17:29:20 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:20 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9867, RMSE = 0.0453
2025-12-10 17:29:20 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.052359, a = 719.635395, b = 19.066874
2025-12-10 17:29:20 - MeltomeAtlasHandler - INFO - Chunk progress : 2 / 25, pid : A0A023T778, specie : Mus musculus BMDC lysate
2025-12-10 17:29:20 - SigmoidFitter - DEBU

  y = (1 - pl) / (1 + np.exp(b - (a / x))) + pl


In [28]:
results0_df = pd.DataFrame(results0)

In [29]:
data_handler.data

Unnamed: 0,proteinId,uniprotAccession,runName,meltingPoint,meltingBehaviour,sequence,quantNormMeltingPoint
37797,Q9ZUL3_IDD5,Q9ZUL3,Arabidopsis thaliana seedling lysate,,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MAASSSSAASFFGVRQDDQSHLLPPNSSAAAPPPPPPHHQAPLPPL...,
17375,P93736_TWN2,P93736,Arabidopsis thaliana seedling lysate,41.318207,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MSLLFLRRAKPLFVSCCSATHSRSSFLSPTLTNQLVRSFHGSRTMS...,
34236,Q9FJW4_FRO1,Q9FJW4,Arabidopsis thaliana seedling lysate,,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MALCATTQRTIRIAATLRRVARPFATDAVVESDYKRGEIGKVSGIP...,
34859,Q9LP45_RPN6,Q9LP45,Arabidopsis thaliana seedling lysate,43.522499,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MVSYRATTETISLALEANSSEAITILYQVLEDPSSSPEAIRIKEQA...,
2869,C0LGQ9_At4g20940,C0LGQ9,Arabidopsis thaliana seedling lysate,42.575046,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MNLSRILLLSMFFLSAMGQLPSQDIMALLEFKKGIKHDPTGFVLNS...,
...,...,...,...,...,...,...,...
64783,CAND1,Q86VP6,pTcells,55.656613,"[{'temperature': 37.0, 'fold_change': 0.999985...",MASASYHISNLLEKMTSSDKDFRFMATNDLMTELQKDSIKLDDDSE...,56.869709
81901,DECR1,Q16698,pTcells,57.770579,"[{'temperature': 37.0, 'fold_change': 0.999985...",MKLPARVFFTLGSRLPCGLAPRRFFSYGTKILYQNTEALQSKFFSP...,58.688807
221084,ZYX,Q15942,pTcells,58.301234,"[{'temperature': 37.0, 'fold_change': 0.999985...",MAAPRPSPAISVSVSAPAFYAPQKKFGPVVAPKPKVNPFRPGDSEP...,59.699714
169838,RASSF2,P50749,pTcells,47.268902,"[{'temperature': 37.0, 'fold_change': 0.999985...",MDYSHQTSLVPCGQDKYISKNELLLHLKTYNLYYEGQNLQLRHREE...,46.627860


In [30]:
results0_df

Unnamed: 0,pid,runName,pl,a,b,rmse,r_squared,tm_pred,tm_flip
0,A0A023PXQ4,Saccharomyces cerevisiae lysate,-0.387047,396.281525,6.904978,0.028,0.9918,52.99,52.403034
1,A0A023T4K3,Caenorhabditis elegans lysate,0.052359,719.635395,19.066874,0.0453,0.9867,37.96,37.962947
2,A0A023T778,Mus musculus BMDC lysate,0.106919,1749.919673,32.061066,0.0564,0.9812,54.99,54.425342
3,A0A061ACF5,Caenorhabditis elegans lysate,0.278508,295.604929,6.790961,0.0246,0.9868,49.46,49.459216
4,A0A061ACH4,Caenorhabditis elegans lysate,0.023551,724.433779,17.056474,0.039,0.9914,42.59,42.593131
5,A0A061ACH8,Caenorhabditis elegans lysate,0.0528,559.097425,14.824896,0.0189,0.9977,38.0,37.999478
6,A0A061ACH9,Caenorhabditis elegans lysate,0.022312,602.364195,14.213085,0.0412,0.9878,42.52,42.517527
7,A0A061ACI3,Caenorhabditis elegans lysate,0.237867,660.532837,15.694724,0.0486,0.9727,43.89,43.892211
8,A0A061ACL3,Caenorhabditis elegans lysate,0.081819,934.223132,22.436277,0.0397,0.9909,41.97,41.973245
9,A0A061ACL6,Caenorhabditis elegans lysate,-1152.945257,224.780039,-3.236776,0.0587,0.9484,49.87,49.044775


In [31]:
results = data_handler.process(num_chunks=10)

2025-12-10 17:29:27 - MeltomeAtlasHandler - INFO - Starting curve fitting process
2025-12-10 17:29:27 - MeltomeAtlasHandler - DEBUG - Data split into 10 chunks
2025-12-10 17:29:27 - MeltomeAtlasHandler - INFO - Processing chunk 1 / 10 (size : 250)
2025-12-10 17:29:27 - MeltomeAtlasHandler - INFO - Chunk progress : 0 / 250, pid : Q9ZUL3, specie : Arabidopsis thaliana seedling lysate
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.3619, RMSE = 0.1774
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.707171, a = 8974.218952, b = 199.170123
2025-12-10 17:29:27 - MeltomeAtlasHandler - INFO - Chunk progress : 1 / 250, pid : P93736, specie : Arabidopsis thaliana seedling lysate
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9984, RMSE = 0.0158
2025-12-10 17:29:27 - Si

  y = (1 - pl) / (1 + np.exp(b - (a / x))) + pl


2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9784, RMSE = 0.0594
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitted parameters: pl = -0.031404, a = 520.818219, b = 10.953354
2025-12-10 17:29:27 - MeltomeAtlasHandler - INFO - Chunk progress : 8 / 250, pid : O64733, specie : Arabidopsis thaliana seedling lysate
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9255, RMSE = 0.0980
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.217659, a = 527.927364, b = 11.850246
2025-12-10 17:29:27 - MeltomeAtlasHandler - INFO - Chunk progress : 9 / 250, pid : Q9LQK7, specie : Arabidopsis thaliana seedling lysate
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9765, RMSE = 0.0529
2025-12-10 17:29:27 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.

  y = (1 - pl) / (1 + np.exp(b - (a / x))) + pl


2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9829, RMSE = 0.0341
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.335451, a = 500.248736, b = 12.458715
2025-12-10 17:29:50 - MeltomeAtlasHandler - INFO - Chunk progress : 248 / 250, pid : G5EEG7, specie : Caenorhabditis elegans lysate
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9992, RMSE = 0.0106
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.076907, a = 549.056253, b = 14.692952
2025-12-10 17:29:50 - MeltomeAtlasHandler - INFO - Chunk progress : 249 / 250, pid : O01530, specie : Caenorhabditis elegans lysate
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.8581, RMSE = 0.0868
2025-12-10 17:29:50 - SigmoidFitter - DEBUG - Fitted parameters: pl = -2474.651184,

In [32]:
print(len(data_handler.data), len(results))

2500 2500


In [33]:
pid, runName = results.loc[0][['pid', 'runName']]
print(pid, runName)
data_handler.data[(data_handler.data.uniprotAccession == pid) & (data_handler.data.runName == runName)]


Q9ZUL3 Arabidopsis thaliana seedling lysate


Unnamed: 0,proteinId,uniprotAccession,runName,meltingPoint,meltingBehaviour,sequence,quantNormMeltingPoint
37797,Q9ZUL3_IDD5,Q9ZUL3,Arabidopsis thaliana seedling lysate,,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MAASSSSAASFFGVRQDDQSHLLPPNSSAAAPPPPPPHHQAPLPPL...,


In [34]:
results[results.notna().all(axis=1)]

Unnamed: 0,pid,runName,pl,a,b,rmse,r_squared,tm_pred,tm_flip
1,P93736,Arabidopsis thaliana seedling lysate,0.026090,444.277431,10.806174,0.0158,0.9984,41.32,41.318207
3,Q9LP45,Arabidopsis thaliana seedling lysate,0.006817,202.879815,4.675222,0.0503,0.9703,43.52,43.522499
4,C0LGQ9,Arabidopsis thaliana seedling lysate,0.016569,214.787844,5.078625,0.0913,0.9086,42.58,42.575046
5,Q39212,Arabidopsis thaliana seedling lysate,0.043045,201.932659,4.778648,0.0367,0.9838,43.07,43.068639
6,O80860,Arabidopsis thaliana seedling lysate,0.082383,332.412146,8.021328,0.0451,0.9831,42.39,42.392558
...,...,...,...,...,...,...,...,...,...
2495,Q86VP6,pTcells,-0.025314,1214.153390,21.746865,0.0139,0.9988,55.70,55.656613
2496,Q16698,pTcells,0.002042,1648.459320,28.538646,0.1453,0.8858,57.77,57.770579
2497,Q15942,pTcells,0.067867,1100.041890,19.014117,0.0877,0.9421,58.30,58.301234
2498,P50749,pTcells,0.040392,1355.763571,28.766170,0.0247,0.9964,47.27,47.268902


In [35]:
results_na = results[results.isna().any(axis=1)]
print(results_na.shape)

(470, 9)


In [36]:
results_na

Unnamed: 0,pid,runName,pl,a,b,rmse,r_squared,tm_pred,tm_flip
0,Q9ZUL3,Arabidopsis thaliana seedling lysate,0.707171,8974.218952,199.170123,0.1774,0.3619,55.07,
2,Q9FJW4,Arabidopsis thaliana seedling lysate,0.550341,7793.893529,172.927892,0.1630,0.6302,57.19,
12,Q9LQU7,Arabidopsis thaliana seedling lysate,0.681728,12915.297468,262.106018,0.1113,0.7170,57.20,
21,P92948,Arabidopsis thaliana seedling lysate,0.679588,1651.243090,36.771328,0.1562,0.3835,100.00,
22,Q9LXM8,Arabidopsis thaliana seedling lysate,,,,,,,
...,...,...,...,...,...,...,...,...,...
2299,,U937,0.025491,1621.839837,30.096259,0.0110,0.9993,53.98,53.982301
2397,Q86X27,colon_cancer_spheroids,,,,,,,66.453361
2422,,pTcells,0.024385,2061.754545,40.048797,0.0325,0.9950,51.55,51.545414
2433,Q9Y262,pTcells,,,,,,,55.634276
