In [1]:
import os
import sys
import datetime
from pathlib import Path
import logging
from main import SigmoidFitter, MeltomeAtlasHandler, setup_logging

import json
import numpy as np
import pandas as pd
import seaborn as sns
from joblib import Parallel, delayed

now = datetime.datetime.now()
timestamp_str = now.strftime("%Y-%m-%d_%H-%M")

LOG_LEVEL = logging.DEBUG
LOG_FILE = f'./testing_{timestamp_str}.log'
setup_logging(LOG_FILE, LOG_LEVEL)

output_path = f'./results_meltome_{timestamp_str}'


2025-12-18 14:20:40 - root - INFO - Logging to file: ./testing_2025-12-18_14-20.log


In [2]:
# SCRIPT_DIR = Path(__file__).resolve().parent
CWD = Path(os.getcwd()).resolve()
PROJECT_ROOT = CWD.parent.parent
print(PROJECT_ROOT)
file_path = PROJECT_ROOT / "Data" / "FLIP" / "splits" / "meltome" / "full_dataset.json"
print(file_path)

C:\Users\alexa\Documents\PROHITS
C:\Users\alexa\Documents\PROHITS\Data\FLIP\splits\meltome\full_dataset.json


In [12]:
# Sample subset of Meltome
data_handler = MeltomeAtlasHandler(file_path, output_path, LOG_LEVEL)
print(data_handler.data.shape)
df = data_handler.select_subset(100)

# Meltome example
example = data_handler.data.groupby(by='runName').sample(n=10, random_state=42)
example.reset_index(inplace=True)
example.drop(columns=['index'], inplace=True)
example.to_json("./example/meltome_example.json", index=False, indent=4)
chunk0 = data_handler.data.iloc[:25]
data_handler.data = df 
print(data_handler.data.shape)

2025-12-18 14:22:26 - MeltomeAtlasHandler - INFO - Meltome Handler initialization
2025-12-18 14:22:29 - MeltomeAtlasHandler - INFO - Meltome data loaded from C:\Users\alexa\Documents\PROHITS\Data\FLIP\splits\meltome\full_dataset.json
2025-12-18 14:22:29 - MeltomeAtlasHandler - INFO - Meltome header ['proteinId', 'uniprotAccession', 'runName', 'meltingPoint', 'meltingBehaviour', 'sequence', 'quantNormMeltingPoint']
2025-12-18 14:22:29 - MeltomeAtlasHandler - INFO - Output directory: c:\Users\alexa\Documents\PROHITS\Code\MeltingBehaviourCLI\results_meltome_2025-12-18_14-20
(221203, 7)
(2500, 7)


In [14]:
test = pd.read_json("./example/meltome_example.json")

In [15]:
test

Unnamed: 0,proteinId,uniprotAccession,runName,meltingPoint,meltingBehaviour,sequence,quantNormMeltingPoint
0,Q9M9Q6_SCPL50,Q9M9Q6,Arabidopsis thaliana seedling lysate,53.178022,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MEQATTLFILLSTLLLAVSVESPQPPLFPDEALPTKSGYLPVKPAP...,
1,Q8W108_ARD3,Q8W108,Arabidopsis thaliana seedling lysate,49.626427,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MGEVVKDGREEVIQAWYMDDSEEDQRLPHHKDPKEFLSLDKLAELG...,
2,Q9LR64_PSB27-1,Q9LR64,Arabidopsis thaliana seedling lysate,,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MASASATATLLKPNLPPHKPTIIASSVSPPLPPPRRNHLLRRDFLS...,
3,O65902_CAP1,O65902,Arabidopsis thaliana seedling lysate,47.122153,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MEEDLIKRLEAAVTRLEGISSNGGGVVSLSRGGDFSSAAGIDIASS...,
4,Q9SUC6_FAD-OXR,Q9SUC6,Arabidopsis thaliana seedling lysate,51.401742,"[{'temperature': 25.0, 'channel': 'TMT126', 'f...",MRELFMYLFLLFLVLCVKSVYSTPTREQFQNCLSTKQFNSTLKNPI...,
...,...,...,...,...,...,...,...
245,KIF21A,Q7Z4S6,pTcells,49.220662,"[{'temperature': 37.0, 'fold_change': 0.999985...",MLGAPDESSVRVAVRIRPQLAKEKIEGCHICTSVTPGEPQVFLGKD...,49.766154
246,GALE,Q14376,pTcells,51.633753,"[{'temperature': 37.0, 'fold_change': 0.999985...",MAEKVLVTGGAGYIGSHTVLELLEAGYLPVVIDNFHNAFRGGGSLP...,51.578408
247,MCCC1,Q96RQ3,pTcells,52.817112,"[{'temperature': 37.0, 'fold_change': 0.999985...",MAAASAVSVLLVAAERNRWHRLPSLLLPPRTWVWRQRTMKYTTATG...,53.552499
248,PFN2,P35080,pTcells,52.724204,"[{'temperature': 37.0, 'fold_change': 0.999985...",MAGWQSYVDNLMCDGCCQEAAIVGYCDAKYVWAATAGGVFQSITPI...,52.605961


In [16]:
data_handler.data = test

In [None]:
r = data_handler.process(10)

2025-12-18 14:22:47 - MeltomeAtlasHandler - INFO - START - curve fitting process
2025-12-18 14:22:47 - MeltomeAtlasHandler - DEBUG - Data split into 10 chunks
2025-12-18 14:22:47 - MeltomeAtlasHandler - INFO - Processing chunk 1 / 10 (size : 25)
2025-12-18 14:22:47 - MeltomeAtlasHandler - INFO - Chunk progress : 1 / 25, pid : Q9M9Q6, specie : Arabidopsis thaliana seedling lysate
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9770, RMSE = 0.0491
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.271765, a = 903.028908, b = 17.765463
2025-12-18 14:22:47 - MeltomeAtlasHandler - INFO - Chunk progress : 2 / 25, pid : Q8W108, specie : Arabidopsis thaliana seedling lysate
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9076, RMSE = 0.1230
2025-12-18 14:22:47 - SigmoidF

  y = (1 - pl) / (1 + np.exp(b - (a / x))) + pl


2025-12-18 14:22:47 - MeltomeAtlasHandler - INFO - Chunk progress : 10 / 25, pid : P11574, specie : Arabidopsis thaliana seedling lysate
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9808, RMSE = 0.0632
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.070485, a = 967.978015, b = 20.175008
2025-12-18 14:22:47 - MeltomeAtlasHandler - INFO - Chunk progress : 11 / 25, pid : P94424, specie : Bacillus subtilis_168_lysate_R1
2025-12-18 14:22:47 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.8371, RMSE = 0.2148
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.000000, a = 37485.309298, b = 879.541739
2025-12-18 14:22:48 - MeltomeAtlasHandler - INFO - Chunk progress : 12 / 25, pid : P40406, specie : Bacillus subtilis_168_lysate_R1
2025-12-18 14:22:48 - SigmoidFitte

  y = (1 - pl) / (1 + np.exp(b - (a / x))) + pl


2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9376, RMSE = 0.0703
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitted parameters: pl = -0.033443, a = 233.779024, b = 6.094173
2025-12-18 14:22:48 - MeltomeAtlasHandler - INFO - Chunk progress : 19 / 25, pid : P39751, specie : Bacillus subtilis_168_lysate_R1
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9968, RMSE = 0.0186
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitted parameters: pl = 0.061000, a = 384.125722, b = 9.450396
2025-12-18 14:22:48 - MeltomeAtlasHandler - INFO - Chunk progress : 20 / 25, pid : P70945, specie : Bacillus subtilis_168_lysate_R1
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Initial parameters: [0.1, 1, 0.1]
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitting complete. R² = 0.9938, RMSE = 0.0280
2025-12-18 14:22:48 - SigmoidFitter - DEBUG - Fitted parameters: pl = -0.002228, a =

FileExistsError: [Errno 17] File exists: 'curve_fit.csv'

In [None]:
pd.DataFrame(test.meltingBehaviour.loc[0])

In [None]:
r = data_handler.process_parallel(10)

In [None]:
test_data = pd.DataFrame(data_handler.data.iloc[0].meltingBehaviour)

In [None]:
test_data

In [None]:
t = SigmoidFitter()
print(t.get_statistics())
r = t.fit_curve(test_data.temperature.to_numpy(), test_data.fold_change.to_numpy())
print(t.get_statistics())

In [None]:
x = t.get_melting_temp()
print(x)

In [None]:
results0 = data_handler.process_chunk(chunk0)

In [None]:
results0_df = pd.DataFrame(results0)

In [None]:
data_handler.data

In [None]:
results0_df

In [None]:
results = data_handler.process(num_chunks=10)

In [None]:
print(len(data_handler.data), len(results))

In [None]:
pid, runName = results.loc[0][['pid', 'runName']]
print(pid, runName)
data_handler.data[(data_handler.data.uniprotAccession == pid) & (data_handler.data.runName == runName)]


In [None]:
results[results.notna().all(axis=1)]

In [None]:
results_na = results[results.isna().any(axis=1)]
print(results_na.shape)

In [None]:
results_na