In [None]:
# !pip install datasets
# !pip install evaluate
# !pip install bert_score
# !pip install bitsandbytes
# !pip install peft
# # !pip install -U bitsandbytes


Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.2-py3-none-any.whl (485 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading 

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import evaluate
import bitsandbytes

from sklearn.model_selection import train_test_split
from bert_score import score as bert_score
from datasets import load_dataset

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    T5ForConditionalGeneration,
    T5Tokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)

from peft import (
    PeftModel,
    PeftConfig,
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)


In [None]:

def load_llama_model():
    model_name = "meta-llama/Llama-3.2-3B-Instruct"
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

def preprocess_datasets():
    openbookqa = load_dataset("openbookqa", "main")
    reveal = load_dataset("google/reveal")

    def filter_bad_chains(example):
        if 'explanation' in example and example['explanation']:
            return len(example['explanation'].split()) > 5
        return False

    filtered_openbookqa = openbookqa.filter(filter_bad_chains)
    return filtered_openbookqa, reveal




In [None]:
# !huggingface-cli login


In [None]:
# llama_model, llama_tokenizer = load_llama_model()


In [None]:
dataset_openbookqa, dataset_reveal = preprocess_datasets()


README.md:   0%|          | 0.00/9.06k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/496k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/58.2k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/55.5k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4957 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

README.md:   0%|          | 0.00/10.3k [00:00<?, ?B/s]

reveal_eval.csv:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

reveal_open.csv:   0%|          | 0.00/2.75M [00:00<?, ?B/s]

Generating eval split:   0%|          | 0/4956 [00:00<?, ? examples/s]

Generating open split:   0%|          | 0/1146 [00:00<?, ? examples/s]

Filter:   0%|          | 0/4957 [00:00<?, ? examples/s]

Filter:   0%|          | 0/500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
print(dataset_reveal)


DatasetDict({
    eval: Dataset({
        features: ['dataset', 'question', 'question_id', 'answer_model', 'step_idx', 'full_answer', 'step', 'decontextualized_step', 'attribution_relevance_label', 'attribution_relevance_majority', 'attribution_relevance_annotations', 'attribution_relevance_raters', 'attribution_relevance_num_ratings', 'evidence_id', 'evidence', 'attribution_label', 'attribution_majority', 'attribution_annotations', 'attribution_raters', 'attribution_num_ratings', 'attribution_justifications', 'annotated_in_attribution_batch', 'type_label', 'type_majority', 'type_annotations', 'type_raters', 'type_num_ratings', 'logic_relevance_label', 'logic_relevance_majority', 'logic_relevance_annotations', 'logic_relevance_raters', 'logic_relevance_num_ratings', 'logic_justifications', 'annotated_in_logic_batch', 'correctness_label', 'correctness_majority', 'correctness_annotations', 'correctness_raters', 'correctness_num_ratings', 'answer_id', 'attribution_majority_all_evidences',

In [None]:
    # Format dataset for causal language modeling with improved prompting
    def format_example(example):
        # Only process examples with valid labels
        if "correctness_label" not in example or example["correctness_label"] is None:
            return None

        # Extract the relevant fields
        question = example.get("question", "")
        step = example.get("step", "")

        # Determine if this is a logical step (reasoning) or attribution step (factual)
        step_type = example.get("type_label", "Unknown step type")
        is_logical_step = "Logical" in step_type

        # Get previous steps if available
        # This is a simplification - in a real implementation, you'd need to collect all previous steps for this question
        previous_steps_text = ""

        # Check if we have full_answer to extract context
        if "full_answer" in example:
            full_answer = example["full_answer"]
            # Find the current step in the full answer
            if step in full_answer:
                # Extract the part of the answer up to the current step
                step_index = full_answer.find(step)
                previous_text = full_answer[:step_index].strip()
                if previous_text:
                    # Split by periods or newlines to get approximate steps
                    sentences = [s.strip() for s in previous_text.replace('\n', '. ').split('. ') if s.strip()]
                    if sentences:
                        previous_steps_text = "Previous steps:\n" + "\n".join(f"- {s}" for s in sentences if s) + "\n\n"

        # Create more explicit instruction based on step type
        if is_logical_step:
            instruction = (
                "You are a reasoning verifier. Your task is to determine if a reasoning step is logically correct "
                "given the context of a question and previous reasoning steps. Focus on whether the step follows "
                "logically from what came before, not just whether it's factually accurate on its own."
            )
        else:
            instruction = (
                "You are a reasoning verifier. Your task is to determine if a reasoning step is valid "
                "given the context of a question. Evaluate whether the step is reasonable and contributes "
                "to answering the question."
            )

        # Include a few-shot example to guide the model
        if is_logical_step:
            few_shot_example = (
    "Example 1:\n"
    "Question: What is 5 + 7?\n"
    "Previous steps:\n"
    "- 5 + 7 can be calculated directly.\n"
    "Reasoning Step: The sum of 5 and 7 is 12.\n"
    "Is the reasoning step valid? Yes, this reasoning step is valid.\n\n"

    "Example 2:\n"
    "Question: How many planets are in our solar system?\n"
    "Previous steps:\n"
    "- There are 8 recognized planets in our solar system.\n"
    "- Pluto used to be considered the 9th planet.\n"
    "Reasoning Step: Therefore, there are 9 planets in our solar system.\n"
    "Is the reasoning step valid? No, this reasoning step is invalid. The previous step states that Pluto is no longer considered a planet.\n\n"

    "Example 3:\n"
    "Question: If microchip makers didn't keep trying to shrink their chips, how fast could your average home computer be by now?\n"
    "Previous steps:\n"
    "- The speed of a computer is determined by the number of transistors on the chip.\n"
    "- The number of transistors on a chip is inversely proportional to the size of each transistor.\n"
    "Reasoning Step: Thus, if microchip makers didn't keep trying to shrink their chips, there would be fewer transistors, resulting in slower computers.\n"
    "Is the reasoning step valid? Yes, this reasoning step is valid. It correctly follows from the previous steps about the relationship between transistor size and computer speed.\n\n"

    "Example 4:\n"
    "Question: What would happen to Earth's climate if the sun suddenly became twice as bright?\n"
    "Previous steps:\n"
    "- The sun provides most of Earth's energy input.\n"
    "- Earth's temperature is determined by the balance of incoming and outgoing radiation.\n"
    "Reasoning Step: If the sun became twice as bright, Earth's climate would cool down dramatically.\n"
    "Is the reasoning step valid? No, this reasoning step is invalid. If the sun became twice as bright, more energy would reach Earth, leading to warming, not cooling.\n\n"

    "Example 5:\n"
    "Question: How would a universal basic income affect poverty rates?\n"
    "Previous steps:\n"
    "- A universal basic income provides regular payments to all citizens regardless of work status.\n"
    "- Poverty is defined as income below a certain threshold.\n"
    "- Some current welfare programs are means-tested and phase out as income increases.\n"
    "Reasoning Step: Therefore, a universal basic income would eliminate all poverty instantly.\n"
    "Is the reasoning step valid? No, this reasoning step is invalid. While a UBI would provide income, the conclusion that it would eliminate all poverty is too strong and doesn't follow logically from the previous steps, which don't specify the amount of UBI or how it compares to poverty thresholds.\n\n"
)
        else:
            few_shot_example = (
    "Example 1:\n"
    "Question: What is the capital of France?\n"
    "Reasoning Step: Paris is the capital of France.\n"
    "Is the reasoning step valid? Yes, this reasoning step is valid.\n\n"

    "Example 2:\n"
    "Question: What is 2+2?\n"
    "Reasoning Step: 2+2=5\n"
    "Is the reasoning step valid? No, this reasoning step is invalid.\n\n"

    "Example 3:\n"
    "Question: How many bones are in the human body?\n"
    "Reasoning Step: The adult human skeleton contains 206 bones, though this number varies slightly between individuals.\n"
    "Is the reasoning step valid? Yes, this reasoning step is valid. It correctly states the standard number of bones in an adult human.\n\n"

    "Example 4:\n"
    "Question: What causes ocean tides?\n"
    "Reasoning Step: Ocean tides are primarily caused by the gravitational forces exerted by the sun and stars.\n"
    "Is the reasoning step valid? No, this reasoning step is invalid. Tides are primarily caused by the gravitational forces of the moon and sun, not stars.\n\n"

    "Example 5:\n"
    "Question: When did World War II end?\n"
    "Reasoning Step: World War II ended in Europe on May 8, 1945 (V-E Day) and in Asia on September 2, 1945 (V-J Day) when Japan formally surrendered.\n"
    "Is the reasoning step valid? Yes, this reasoning step is valid. It accurately presents the commonly accepted end dates of World War II in different theaters.\n\n"

    "Example 6:\n"
    "Question: What happens during photosynthesis?\n"
    "Reasoning Step: During photosynthesis, plants convert water and oxygen into glucose and carbon dioxide using energy from sunlight.\n"
    "Is the reasoning step valid? No, this reasoning step is invalid. Photosynthesis converts water and carbon dioxide into glucose and oxygen, not the other way around.\n\n"
)

        # Format the actual task
        task = (
            f"Question: {question}\n"
            f"{previous_steps_text}"
            f"Reasoning Step: {step}\n"
            f"Is the reasoning step valid?"
        )

        # Create the target response based on correctness label
        if example["correctness_label"] == "Correct":
            target = " Yes, this reasoning step is valid."
        else:
            target = " No, this reasoning step is invalid."

        # Combine everything into the full prompt
        full_prompt = f"{instruction}\n\n{few_shot_example}{task}{target}</s>"

        return {"input_text": full_prompt, "label": example["correctness_label"]}

    # Apply formatting
    formatted_dataset = eval_reveal.map(format_example)
    formatted_dataset = formatted_dataset.filter(lambda x: x is not None)

    print(f"Total formatted examples: {len(formatted_dataset)}")

    # Sample and print a few examples to verify formatting
    print("\nSample formatted examples:")
    for i in range(min(3, len(formatted_dataset))):
        print(f"Example {i+1}:\n{formatted_dataset[i]['input_text'][:500]}...\n")

    # Analyze label distribution
    correct_count = sum(1 for item in formatted_dataset if item["label"] == "Correct")
    incorrect_count = sum(1 for item in formatted_dataset if item["label"] == "Incorrect")
    print(f"Label distribution:")
    print(f"  Correct: {correct_count} ({correct_count/len(formatted_dataset)*100:.2f}%)")
    print(f"  Incorrect: {incorrect_count} ({incorrect_count/len(formatted_dataset)*100:.2f}%)")

    # Tokenize the dataset for causal LM
    def tokenize_example(example):
        # Tokenize with padding and truncation
        tokenized = verifier_tokenizer(
            example["input_text"],
            padding="max_length",
            truncation=True,
            max_length=768,  # Increased max length to accommodate the examples
            return_tensors="pt"
        )

        input_ids = tokenized["input_ids"][0]
        attention_mask = tokenized["attention_mask"][0]

        # Set up labels for causal LM
        labels = input_ids.clone()
        labels[labels == verifier_tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

    tokenized_dataset = formatted_dataset.map(
        tokenize_example,
        remove_columns=["input_text", "label"]
    )

    # Balance the dataset to have equal numbers of valid and invalid examples
    valid_examples = [ex for i, ex in enumerate(tokenized_dataset) if formatted_dataset[i]["label"] == "Correct"]
    invalid_examples = [ex for i, ex in enumerate(tokenized_dataset) if formatted_dataset[i]["label"] == "Incorrect"]

    print(f"Valid examples: {len(valid_examples)}")
    print(f"Invalid examples: {len(invalid_examples)}")

    # To balance, take the minimum count and sample from the larger group
    min_count = min(len(valid_examples), len(invalid_examples))
    balanced_count = min(min_count, 3000)  # Cap at 3000 examples per class to keep training manageable

    # Sample from both groups to create a balanced dataset
    import random
    random.seed(42)
    if len(valid_examples) > balanced_count:
        valid_examples = random.sample(valid_examples, balanced_count)
    else:
        valid_examples = valid_examples[:balanced_count]

    if len(invalid_examples) > balanced_count:
        invalid_examples = random.sample(invalid_examples, balanced_count)
    else:
        invalid_examples = invalid_examples[:balanced_count]

    # Combine and shuffle the balanced examples
    balanced_examples = valid_examples + invalid_examples
    random.shuffle(balanced_examples)

    print(f"Total balanced examples: {len(balanced_examples)}")

    # Split into train and validation sets
    train_data, val_data = train_test_split(balanced_examples, test_size=0.15, random_state=42)

    print(f"Training examples: {len(train_data)}")
    print(f"Validation examples: {len(val_data)}")

    # Training arguments - optimized for memory efficiency but with longer training
    training_args = TrainingArguments(
        output_dir="./gemma_verifier_enhanced_lora_100_epoch",
        evaluation_strategy="steps",
        eval_steps=100,
        save_strategy="steps",
        save_steps=100,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,  # Larger effective batch size
        per_device_eval_batch_size=1,
        learning_rate=1e-4,
        weight_decay=0.01,
        warmup_ratio=0.1,
        save_total_limit=3,
        load_best_model_at_end=True,
        fp16=True,  # Use FP16 during training
        logging_steps=10,
        num_train_epochs=num_epochs,  # More epochs
        report_to="none"  # Disable wandb or other reporting tools if not needed
    )

    # Create data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=verifier_tokenizer,
        mlm=False  # We're doing causal language modeling, not masked language modeling
    )

    # Create trainer
    trainer = Trainer(
        model=verifier_model,
        args=training_args,
        train_dataset=train_data,
        eval_dataset=val_data,
        data_collator=data_collator,
        tokenizer=verifier_tokenizer
    )

    # Train the model
    trainer.train()

    # Save the model with all necessary components
    peft_model_path = "./gemma_verifier_enhanced_lora_final_100_epoch"
    verifier_model.save_pretrained(peft_model_path)
    verifier_tokenizer.save_pretrained(peft_model_path)

    # Save adapter config separately to make loading easier
    verifier_model.config.save_pretrained(peft_model_path)

    # Define improved prediction function with context handling
    def predict_validity(question, step, previous_steps=None, step_type="Unknown"):
        """
        Predict whether a reasoning step is valid.

        Args:
            question (str): The question being answered
            step (str): The reasoning step to verify
            previous_steps (list, optional): Previous reasoning steps
            step_type (str, optional): Type of step ("Logical step" or "Attribution step")

        Returns:
            bool: True if the step is valid, False otherwise
        """
        is_logical_step = "Logical" in step_type

        # Format previous steps if provided
        previous_steps_text = ""
        if previous_steps and len(previous_steps) > 0:
            previous_steps_text = "Previous steps:\n" + "\n".join(f"- {s}" for s in previous_steps if s) + "\n\n"

        # Create appropriate instruction based on step type
        if is_logical_step:
            instruction = (
                "You are a reasoning verifier. Your task is to determine if a reasoning step is logically correct "
                "given the context of a question and previous reasoning steps. Focus on whether the step follows "
                "logically from what came before, not just whether it's factually accurate on its own."
            )
            few_shot_example = (
                "Example 1:\n"
                "Question: What is 5 + 7?\n"
                "Previous steps:\n"
                "- 5 + 7 can be calculated directly.\n"
                "Reasoning Step: The sum of 5 and 7 is 12.\n"
                "Is the reasoning step valid? Yes, this reasoning step is valid.\n\n"
                "Example 2:\n"
                "Question: How many planets are in our solar system?\n"
                "Previous steps:\n"
                "- There are 8 recognized planets in our solar system.\n"
                "- Pluto used to be considered the 9th planet.\n"
                "Reasoning Step: Therefore, there are 9 planets in our solar system.\n"
                "Is the reasoning step valid? No, this reasoning step is invalid. The previous step states that Pluto is no longer considered a planet.\n\n"
            )
        else:
            instruction = (
                "You are a reasoning verifier. Your task is to determine if a reasoning step is valid "
                "given the context of a question. Evaluate whether the step is reasonable and contributes "
                "to answering the question."
            )
            few_shot_example = (
                "Example 1:\n"
                "Question: What is the capital of France?\n"
                "Reasoning Step: Paris is the capital of France.\n"
                "Is the reasoning step valid? Yes, this reasoning step is valid.\n\n"
                "Example 2:\n"
                "Question: What is 2+2?\n"
                "Reasoning Step: 2+2=5\n"
                "Is the reasoning step valid? No, this reasoning step is invalid.\n\n"
            )

        # Format the task
        task = (
            f"Question: {question}\n"
            f"{previous_steps_text}"
            f"Reasoning Step: {step}\n"
            f"Is the reasoning step valid?"
        )

        # Combine everything into the full prompt
        input_text = f"{instruction}\n\n{few_shot_example}{task}"

        inputs = verifier_tokenizer(input_text, return_tensors="pt").to(verifier_model.device)

        with torch.no_grad():
            outputs = verifier_model.generate(
                **inputs,
                max_new_tokens=40,
                temperature=0.1,
                do_sample=False,
                top_p=0.95,
            )

        prediction = verifier_tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract just the prediction part
        response = prediction.split("Is the reasoning step valid?")[-1].strip()

        # Look for detailed explanation in the response
        contains_explanation = len(response.split()) > 5

        # Analyze the response for validity indicators
        valid_indicators = ["yes", "valid", "correct", "accurate", "true"]
        invalid_indicators = ["no", "invalid", "incorrect", "wrong", "false", "error", "mistake"]

        # Check for clear validity indicators
        has_valid = any(indicator in response.lower() for indicator in valid_indicators)
        has_invalid = any(indicator in response.lower() for indicator in invalid_indicators)

        # If we have both or neither, look for patterns like "Yes, this reasoning step is valid"
        if (has_valid and has_invalid) or (not has_valid and not has_invalid):
            # Look for specific phrases
            if "yes, this reasoning step is valid" in response.lower():
                return True
            elif "no, this reasoning step is invalid" in response.lower():
                return False

            # If we have an explanation, do more nuanced analysis
            if contains_explanation:
                # Count positive vs negative indicators
                valid_count = sum(response.lower().count(indicator) for indicator in valid_indicators)
                invalid_count = sum(response.lower().count(indicator) for indicator in invalid_indicators)

                if valid_count > invalid_count:
                    return True
                elif invalid_count > valid_count:
                    return False

            # Default to interpreting "yes" or "no" at the start
            if response.lower().startswith("yes"):
                return True
            elif response.lower().startswith("no"):
                return False

        # If we only have valid indicators
        if has_valid and not has_invalid:
            return True

        # If we only have invalid indicators
        if has_invalid and not has_valid:
            return False

        # Fall back to a conservative approach
        return False

    return verifier_model, verifier_tokenizer, predict_validity






def fine_tune_verifier(num_epochs=50):
    # Load model and tokenizer in FP16 (no quantization)
    model_name = "google/gemma-2b"

    # Load model with FP16 (no quantization)
    verifier_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,  # Use FP16 instead of quantization
        device_map="auto"
    )
    verifier_tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Set padding token if needed
    if verifier_tokenizer.pad_token is None:
        verifier_tokenizer.pad_token = verifier_tokenizer.eos_token
        verifier_model.config.pad_token_id = verifier_tokenizer.pad_token_id

    # Enable gradient checkpointing to save memory
    verifier_model.gradient_checkpointing_enable()

    # Define LoRA configuration for Gemma
    lora_config = LoraConfig(
        r=8,  # Small rank to save memory
        lora_alpha=16,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )

    # Apply LoRA to the model
    verifier_model = get_peft_model(verifier_model, lora_config)

    # Print trainable parameters info
    verifier_model.print_trainable_parameters()

    # Load REVEAL dataset
    reveal = load_dataset("google/reveal")

    # Analyze the dataset
    print(f"Dataset splits: {reveal.keys()}")
    print(f"Eval split size: {len(reveal['eval'])}")
    if 'open' in reveal:
        print(f"Open split size: {len(reveal['open'])}")

    # Filter out low-agreement annotations from eval split
    eval_reveal = reveal["eval"].filter(lambda example: not example.get("is_low_agreement_hard_case", False))

    # Ensure correctness_label exists and is valid before processing
    eval_reveal = eval_reveal.filter(
        lambda example: example.get("correctness_label") is not None and
                       isinstance(example.get("correctness_label"), str)
    )


In [None]:
   verifier_model, verifier_tokenizer, predict_validity = fine_tune_verifier()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 1,843,200 || all params: 2,508,015,616 || trainable%: 0.0735
Dataset splits: dict_keys(['eval', 'open'])
Eval split size: 4956
Open split size: 1146
Total formatted examples: 1256

Sample formatted examples:
Example 1:
You are a reasoning verifier. Your task is to determine if a reasoning step is logically correct given the context of a question and previous reasoning steps. Focus on whether the step follows logically from what came before, not just whether it's factually accurate on its own.

Example 1:
Question: What is 5 + 7?
Previous steps:
- 5 + 7 can be calculated directly.
Reasoning Step: The sum of 5 and 7 is 12.
Is the reasoning step valid? Yes, this reasoning step is valid.

Example 2:
Question: How ...

Example 2:
You are a reasoning verifier. Your task is to determine if a reasoning step is logically correct given the context of a question and previous reasoning steps. Focus on whether the step follows logically from what came before, not just whether it's

  trainer = Trainer(


Step,Training Loss,Validation Loss
100,0.1627,0.169101
200,0.1217,0.138981
300,0.1047,0.134835
400,0.0939,0.137707
500,0.0837,0.1469
600,0.0703,0.167804
700,0.0641,0.179298
800,0.0594,0.189314
