<a href="https://colab.research.google.com/github/LeDuy23/-stock-analyzer-/blob/main/Riddles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Baseline API keys

In [None]:
import openai
import requests

# API Keys
OPENAI_API_KEY = "YOUR_OPENAI_API_KEY"
LLAMA_API_KEY = "YOUR_LLAMA_API_KEY"
DEEPSEEK_API_KEY = "YOUR_DEEPSEEK_API_KEY"

# Set OpenAI API Key
openai.api_key = OPENAI_API_KEY


### API Calls

In [None]:
def query_gpt4(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}]
    )
    return response["choices"][0]["message"]["content"]


In [None]:
def query_llama(prompt):
    url = "https://api.together.xyz/v1/completions"  # Replace with your provider
    headers = {"Authorization": f"Bearer {LLAMA_API_KEY}", "Content-Type": "application/json"}
    data = {
        "model": "meta-llama/llama-3-8b",  # Specify model version
        "prompt": prompt,
        "max_tokens": 100
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()["choices"][0]["text"].strip()


In [None]:
def query_deepseek(prompt):
    url = "https://api.deepseek.com/v1/completions"  # Replace with your API provider
    headers = {"Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json"}
    data = {
        "model": "deepseek-reasoning",
        "prompt": prompt,
        "max_tokens": 100
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()["choices"][0]["text"].strip()


# Anything below is for training o1 model

### Load Dataset into Colabs

In [None]:
import os
import pandas as pd

# Path to dataset
data_path = "/content/Riddle_data"

# List files
print("Available Files:", os.listdir(data_path))

# Load English dataset
train_en = pd.read_csv(f"{data_path}/BiRdQA_en_train.csv")
test_en = pd.read_csv(f"{data_path}/BiRdQA_en_test.csv")
dev_en = pd.read_csv(f"{data_path}/BiRdQA_en_dev.csv")

# Load Chinese dataset
train_zh = pd.read_csv(f"{data_path}/BiRdQA_zh_train.csv")
test_zh = pd.read_csv(f"{data_path}/BiRdQA_zh_test.csv")
dev_zh = pd.read_csv(f"{data_path}/BiRdQA_zh_dev.csv")

# Show dataset info
print(f"Train (EN): {train_en.shape}, Train (ZH): {train_zh.shape}")
print(f"Test (EN): {test_en.shape}, Test (ZH): {test_zh.shape}")
print(f"Dev (EN): {dev_en.shape}, Dev (ZH): {dev_zh.shape}")



Available Files: ['BiRdQA_zh_dev.csv', 'wiki_intro.json', 'BiRdQA_en_dev.csv', 'BiRdQA_zh_train.csv', 'BiRdQA_zh_test.csv', 'BiRdQA_en_test.csv', 'BiRdQA_en_train.csv']
Train (EN): (4093, 7), Train (ZH): (5943, 7)
Test (EN): (1460, 7), Test (ZH): (1766, 7)
Dev (EN): (1061, 7), Dev (ZH): (1042, 7)


In [None]:
pip install torch transformers datasets accelerate pandas numpy matplotlib seaborn nltk openai requests sentencepiece


Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

### API key

In [None]:
import requests

# Set up GBT-o1 API Key
GBT_O1_API_KEY = "your_api_key_here"

# Define API endpoint
GBT_O1_API_URL = "https://your-api-endpoint.com/v1/train"


### Load and Preprocess Data

In [None]:
import pandas as pd

# Load dataset files
train_data_1 = pd.read_csv(f"{data_path}/train_1.csv")
train_data_2 = pd.read_csv(f"{data_path}/train_2.csv")
test_data_1 = pd.read_csv(f"{data_path}/test_1.csv")
test_data_2 = pd.read_csv(f"{data_path}/test_2.csv")
dev_data_1 = pd.read_csv(f"{data_path}/dev_1.csv")
dev_data_2 = pd.read_csv(f"{data_path}/dev_2.csv")

# Merge datasets
train_data = pd.concat([train_data_1, train_data_2], ignore_index=True)
test_data = pd.concat([test_data_1, test_data_2], ignore_index=True)
dev_data = pd.concat([dev_data_1, dev_data_2], ignore_index=True)

# Rename columns if necessary
train_data.columns = ["riddle_en", "riddle_zh", "answer_en", "answer_zh"]
test_data.columns = train_data.columns
dev_data.columns = train_data.columns

# Remove duplicates and empty values
train_data = train_data.dropna().drop_duplicates()
test_data = test_data.dropna().drop_duplicates()
dev_data = dev_data.dropna().drop_duplicates()

# Show dataset size
print(f"Train Size: {len(train_data)}, Test Size: {len(test_data)}, Dev Size: {len(dev_data)}")
