In [None]:
import pandas as pd
import re
import numpy as np
import tqdm
from helper import *

In [None]:
data = pd.read_csv("/project/ai901503-ai0003/kiki/test_edit.csv")

submission = pd.read_csv("/project/ai901503-ai0003/kiki/submission.csv")

# Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = '/project/ai901503-ai0003/Model/Qwen3-30B-A3B'

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

In [None]:
def get_model_inputs(question):
    # prepare the model input
    prompt = '''###Role:
                You are an expert in ethical finance.

                ###Objective:
                Carefully read the question and select the **single best answer** from the following choices:

                A, B, C, D, E, Rise, Fall

                ###Instructions:
                - Only use one of the provided options: **A, B, C, D, E, Rise, Fall**
                - Your final answer must be wrapped **exactly** in: <answer>YourAnswer</answer>
                - Do **not** include any extra text, explanation, or formatting unless specified.


            '''

    messages = [
        {"role": "system", "content": prompt},
        {"role": "user", "content": question+"let's think step by step and answer the question."},
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking= False # Switches between thinking and non-thinking modes. Default is True.
    )
    return tokenizer([text], return_tensors="pt").to(model.device)

def model_generate(question):   
    # conduct text completion
    generated_ids = model.generate(
        **get_model_inputs(question),
        max_new_tokens=10000,
        use_cache = True
    )
    output_ids = generated_ids[0][len(get_model_inputs(question).input_ids[0]):].tolist()

    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0

    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

    print("thinking content:", thinking_content)
    print("content:", content)
    print("token length:", len(output_ids))
    return thinking_content, content

# Model VLLM

In [None]:
import pandas as pd
import numpy as np
import re
from transformers import AutoModelForCausalLM, AutoTokenizer
import tqdm
from vllm import LLM, SamplingParams
import torch

In [None]:
df = pd.read_csv("/project/ai901503-ai0003/kiki/test_edit.csv")
submission = pd.read_csv("/project/ai901503-ai0003/kiki/submission.csv")
model_name = '/project/ai901503-ai0003/Model/Qwen3-30B-A3B'
tokenizer_name = '/project/ai901503-ai0003/kiki/model/typhoon2.1-gemma3-12b'

In [None]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

In [None]:
# load the tokenizer and the model
sampling_params = SamplingParams(temperature=0.7, top_p=0.8, top_k=10, min_p=0, max_tokens=4096)
model = LLM(model=model_name, dtype=torch.float16, tensor_parallel_size=4, task="generate")

In [None]:
def model_generate(question, system_prompt):
    system_prompt = '''
        ###Role
        You are a financial time series expert. 

        ###Instructions
        The following is a summary of engineered features and exploratory data analysis from a stock price dataset. 
        Use this information to reason whether the price is likely to rise or fall in the next 5 days.

        ###Details
        - Rise is when the price increases after 5 days.
        - Fall is when the price decreases after 5 days.

        ###Output
        1. **Always** give short reasons why the price is likely to rise or fall.
        2. Use concise language.
        3. Answer with a single choice: "Rise" or "Fall". **ONLY use these two options**
        4. Your final answer must be wrapped **exactly** in: <answer>YourAnswer</answer> **THIS IS VERY IMPORTANT**


        '''
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": question+'/think'+"Let's think step by step and answer the question."},
    ]
    
    outputs = model.chat(messages, sampling_params)
    token = len(tokenizer.encode(outputs[0].outputs[0].text, add_special_tokens=False))
    print(f"Token length: {token}")
    print(outputs[0].outputs[0].text)
    
    match = re.search(r'<answer>(.*?)</answer>', outputs[0].outputs[0].text)

    if match:
        result = match.group(1)
        print(result)
        return result
    else:
        print(np.nan)
        return np.nan

In [None]:
stock_df = get_stock_df(df)

In [None]:
stock_submission = stock_df.copy()
stock_submission['answer'] = np.nan
stock_submission = stock_submission.reset_index(drop=True)

In [None]:
for n,question in enumerate(tqdm.tqdm(stock_df['query'])):
    prompt = cal_stat(question)
    print(prompt)
    output = model_generate(prompt, system_prompt)
 
    stock_submission.loc[n,"ansewr"] = output
    stock_submission.to_csv("/project/ai901503-ai0003/kiki/submission_stock_Final.csv", index=False)
    print("\n--------------------------------------------------\n", n)

In [None]:
submission = pd.read_csv('/project/ai901503-ai0003/kiki/Qwen3-30B-A3-English_Prompt-Thinking.csv')

In [None]:
final_submission =  submission.merge(stock_submission, on="id", how = 'left')

In [None]:
final_submission['answer_y'] = final_submission['answer_y'].fillna(final_submission['answer_x'])

In [None]:
final_submission

In [None]:
super_final_submission = final_submission[['id', 'answer_y']].rename(columns={'answer_y': 'answer'})

In [None]:
super_final_submission.to_csv("/project/ai901503-ai0003/kiki/submission_Rise_fall_zeroshot_abitCoT.csv", index=False)