In [1]:
import os
import sys
import datetime
from pathlib import Path
import logging
from main import SigmoidFitter, MeltomeAtlasHandler, setup_logging

import json
import numpy as np
import pandas as pd
import seaborn as sns
from joblib import Parallel, delayed

now = datetime.datetime.now()
timestamp_str = now.strftime("%Y-%m-%d_%H-%M")

LOG_LEVEL = logging.DEBUG
LOG_FILE = f'./testing_{timestamp_str}.log'
setup_logging(LOG_FILE, LOG_LEVEL)

output_path = f'./results_meltome_{timestamp_str}'


2025-12-12 15:30:08 - root - INFO - Logging to file: ./testing_2025-12-12_15-30.log


In [2]:
# SCRIPT_DIR = Path(__file__).resolve().parent
CWD = Path(os.getcwd()).resolve()
PROJECT_ROOT = CWD.parent.parent
print(PROJECT_ROOT)
file_path = PROJECT_ROOT / "Data" / "FLIP" / "splits" / "meltome" / "full_dataset.json"
print(file_path)

C:\Users\alexa\Documents\PROHITS
C:\Users\alexa\Documents\PROHITS\Data\FLIP\splits\meltome\full_dataset.json


In [3]:
# Sample subset of Meltome
data_handler = MeltomeAtlasHandler(file_path, output_path, LOG_LEVEL)
print(data_handler.data.shape)
df = data_handler.select_subset(100)
chunk0 = data_handler.data.iloc[:25]
data_handler.data = df 
print(data_handler.data.shape)

2025-12-12 15:30:08 - MeltomeAtlasHandler - INFO - Meltome Handler initialization
2025-12-12 15:30:08 - MeltomeAtlasHandler - DEBUG - DEBUG Mode
2025-12-12 15:30:10 - MeltomeAtlasHandler - INFO - Meltome data loaded from C:\Users\alexa\Documents\PROHITS\Data\FLIP\splits\meltome\full_dataset.json
2025-12-12 15:30:10 - MeltomeAtlasHandler - INFO - Meltome header ['proteinId', 'uniprotAccession', 'runName', 'meltingPoint', 'meltingBehaviour', 'sequence', 'quantNormMeltingPoint']
2025-12-12 15:30:10 - MeltomeAtlasHandler - INFO - Output directory: c:\Users\alexa\Documents\PROHITS\Code\MeltingBehaviourCLI\results_meltome_2025-12-12_15-30
(221203, 7)
(2500, 7)


In [4]:
r = data_handler.process_parallel(10)

2025-12-12 15:30:10 - MeltomeAtlasHandler - INFO - START - curve fitting process parallel
2025-12-12 15:30:10 - MeltomeAtlasHandler - DEBUG - Data split into 10 chunks
2025-12-12 15:30:10 - MeltomeAtlasHandler - INFO - NOTE : Parallel processing of chunks does not log row progress
2025-12-12 15:30:10 - MeltomeAtlasHandler - INFO - Processing chunk 1 / 10 (size : 250)
2025-12-12 15:30:31 - MeltomeAtlasHandler - DEBUG - Writing results to curve_fit.csv in mode x
2025-12-12 15:30:31 - MeltomeAtlasHandler - INFO - Processing chunk 2 / 10 (size : 250)
2025-12-12 15:30:59 - MeltomeAtlasHandler - DEBUG - Writing results to curve_fit.csv in mode a
2025-12-12 15:30:59 - MeltomeAtlasHandler - INFO - Processing chunk 3 / 10 (size : 250)
2025-12-12 15:31:09 - MeltomeAtlasHandler - DEBUG - Writing results to curve_fit.csv in mode a
2025-12-12 15:31:09 - MeltomeAtlasHandler - INFO - Processing chunk 4 / 10 (size : 250)
2025-12-12 15:31:12 - MeltomeAtlasHandler - DEBUG - Writing results to curve_fit.

In [None]:
r[r.status == 'FAILURE']

In [None]:
test_data = pd.DataFrame(data_handler.data.iloc[0].meltingBehaviour)

In [None]:
test_data

In [None]:
t = SigmoidFitter()
print(t.get_statistics())
r = t.fit_curve(test_data.temperature.to_numpy(), test_data.fold_change.to_numpy())
print(t.get_statistics())

In [None]:
x = t.get_melting_temp()
print(x)

In [None]:
results0 = data_handler.process_chunk(chunk0)

In [None]:
results0_df = pd.DataFrame(results0)

In [None]:
data_handler.data

In [None]:
results0_df

In [None]:
results = data_handler.process(num_chunks=10)

In [None]:
print(len(data_handler.data), len(results))

In [None]:
pid, runName = results.loc[0][['pid', 'runName']]
print(pid, runName)
data_handler.data[(data_handler.data.uniprotAccession == pid) & (data_handler.data.runName == runName)]


In [None]:
results[results.notna().all(axis=1)]

In [None]:
results_na = results[results.isna().any(axis=1)]
print(results_na.shape)

In [None]:
results_na