# Input File Requirements

Input file should be an Excel. The table consists of three parts: Symbol column, data columns, and label column.

## 1. Column Structure

###  **Symbol Column**
- **Column Name**: `Symbol`
- **Content**: Gene identifiers.
- **Position**: First column.

###  **Data Columns**
- **Column Naming Rule**: `time_dup`
  - `time`: The time point of the sample.
  - `dup`: The replication number of the sample.
- **Sorting Rules**:
  - Columns with the same `dup` value are grouped together.
  - Within each group, columns are sorted by `time` in ascending order.
  - Groups are sorted by `dup` in ascending order.
- **Example Column Names**: `0_0`, `4_0`, `8_0`, `0_1`, `4_1`, `8_1`, etc.
- **Content**: Expression values of the corresponding gene at specific times and replications.

###  **Label Column**
- **Column Name**: `label`
- **Content**: A label indicating whether the gene oscillates, with values `0` or `1`.
  - `0`: The gene does not oscillate.
  - `1`: The gene oscillates.
- **Position**: Last column.
- **Note**: If the `label` is uncertain, it can be set to `1` for all entries.

## 2. Example Table Structure

| Symbol | 0_0  | 4_0  | 8_0  | ... | label |
|--------|------|------|------|-----|-------|
| GENE1  | 0.36314 | 0.838363 | 0.850397 | ... | 0     |
| GENE2  | 3.49872 | 3.780274 | 3.770533 | ... | 0     |
| ...    | ...  | ...  | ...  | ... | ...   |

## 3. Data Filling Instructions
- **Symbol Column**: Fill in the unique identifiers for the genes.
- **Data Columns**: Fill in the expression values of the corresponding gene at specific times and replications.
- **Label Column**: Fill in `0` or `1` based on whether the gene oscillates. If uncertain, fill in `1` for all entries.

In [None]:
from utils.ts import singal_convert_to_ts

singal_convert_to_ts(
    input_csv='../example_data/example_data_t1.csv',
    output_ts='../example_data/t1/test.ts',
    problem_name='example',
    label_col='label'
)

In [None]:
from argparse import Namespace
from models.circaLLM import CIRCALLM
config_dict = {
    "task_name": "classification", 
    "model_name": "CIRCALLM", 
    "transformer_type": "encoder_only", 
    "freeze_embedder":False,
    "freeze_encoder":False,
    "freeze_head":False,
    "learning_rate":1e-6,
    "num_epochs":20,
    "n_channels": 1,
    "num_class": 2,
    'reduction': 'mean',
    "d_model": None, 
    "seq_len": 72,
    'enable_gradient_checkpointing': False,
    "enable_FAN":True,
    "enable_FAN_gate":True,
    "patch_len": 6, 
    "patch_stride_len": 6, 
    "device": "cpu", 
    "transformer_backbone": "google/flan-t5-small", 
    "model_kwargs": {},
    "t5_config": {
        "architectures": ["T5ForConditionalGeneration"],
        "d_ff": 1024,
        "d_kv": 64,
        "d_model": 512,
        "decoder_start_token_id": 0,
        "dropout_rate": 0.1,
        "eos_token_id": 1,
        "feed_forward_proj": "gated-gelu",
        "initializer_factor": 1.0,
        "is_encoder_decoder": True,
        "layer_norm_epsilon": 1e-06,
        "model_type": "t5",
        "n_positions": 72,
        "num_decoder_layers": 6,
        "num_heads": 8,
        "num_layers": 6,
        "output_past": True,
        "pad_token_id": 0,
        "relative_attention_max_distance": 128,
        "relative_attention_num_buckets": 32,
        "tie_word_embeddings": False,
        "use_cache": True,
        "vocab_size": 32128
    }
}

config = Namespace(**config_dict)

model =CIRCALLM(config)
print(model)

In [None]:
import torch

state_dict=torch.load("pretrained/Task1/best_model.pth")#best_model
model.load_state_dict(state_dict["model_state_dict"])#state_dict["model_state_dict"]
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
from tqdm import tqdm 
import numpy as np

def test_epoch(model,dataloader,device,criterion):
    return evaluate_epoch(model,dataloader,device,criterion)
    
def evaluate_epoch(model,dataloader,device,criterion):
    model.eval()
    model.to(device)

    all_targets, all_preds, all_scores = [],[],[]
    running_loss, correct, total = 0.0, 0,0
    with torch.no_grad():
        for batch_data, input_mask, x_marks, targets in tqdm(dataloader, total=len(dataloader)):

            batch_data = batch_data.to(device).float()
            input_mask, x_marks=input_mask.long().to(device), x_marks.to(device)
            all_targets.extend(targets.detach().cpu().numpy())
            targets=targets.unsqueeze(1).float().to(device)
            total += targets.size(0)

            with torch.autocast(device_type='cuda', dtype=torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float32):
                output = model(x_enc=batch_data,input_mask=input_mask,x_mark=x_marks,reduction=config.reduction)
                logits=output.logits
                loss = criterion(logits, targets)

            running_loss += loss.item()

            scores=torch.sigmoid(logits)
            predicted = (scores > 0.5).int()
            all_preds.extend(predicted.detach().cpu().numpy())
            correct += (predicted == targets.int()).sum().item()
            all_scores.extend(scores.detach().to(torch.float).cpu().numpy())
    
    all_targets = np.array(all_targets)
    all_preds = np.array(all_preds)
    all_scores = np.array(all_scores)

    avg_loss = running_loss / len(dataloader)
    accuracy = correct / total
    result={
        "loss":[avg_loss],
        "accuracy":[accuracy],
        "targets":all_targets.tolist(),
        "preds":all_preds.tolist(),
        "scores":all_scores.tolist(),
    }
    return result

In [None]:
import os
import torch
from data_provider.classfication_datasets import MultipleDataset
from torch.utils.data import DataLoader
from utils.logging import CustomLogger
from utils.metrics import Metric
from datetime import datetime
import numpy as np

seed = 77
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

test_file_fold = "../example_data/t1/"  
assert os.path.exists(test_file_fold), f"测试文件夹不存在: {test_file_fold}"
result_fold = '../example_data/t1/result/'

test_dataset = MultipleDataset(
    data_split="aper",      
    file_paths=[test_file_fold],
    seq_len=72,
    seed=seed,
    Realcase=True
)
print(test_dataset.data.shape)
torch.manual_seed(seed)
id=test_dataset.labels[:,0]
test_dataset.labels=test_dataset.labels[:,1].astype(int)

test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1e-4,total_steps=10*len(test_dataloader))
trainSave={'loss':[],'accuracy':[],'targets':[],'preds':[],'scores':[]}
label=test_dataset.labels
pos_weights = torch.tensor([np.sum(label)/(len(label) - np.sum(label))],dtype=torch.float32)
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weights.to(device))

test_result=test_epoch(model, test_dataloader, device, criterion)
print(f"Test Accuracy: {test_result['accuracy'][0]:.4f}")
for kk in trainSave:
    trainSave[kk].extend(test_result[kk])
trainSave['ID']=id.tolist()

Metric.save_metrics(trainSave, result_fold, "current_res.json", 0, "example", mode='w')

In [None]:
import utils.pro_json as pro_json

acc, pre, rec, f1, auroc, aupr = pro_json.calculate_binary_metrics(json_path='../example_data/t1/result/example/current_res.json')
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {pre:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUROC: {auroc:.4f}")
print(f"AUPR: {aupr:.4f}")

In [None]:
import utils.pro_json as pro_json

pro_json.process_json_to_csv(
    json_path="../example_data/t1/result/example/current_res.json", 
    output_csv="../example_data/t1/result/example/example_circallm.csv" 
)

In [None]:
# torch.save(model.state_dict(),"/mnt/disk/zxc/TimeSeriesLLM/circaLLM/pretrained/RealDST_T1/Geod_model.pth")