In [None]:
from openai import OpenAI
import os
import pandas as pd
import re
from tqdm import tqdm



In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load model and tokenizer
model_path = "/home/liorkob/M.Sc/thesis/pre-process/classifier_sentences_punishment_range"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()  


def should_call_gpt(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
    return predicted_class == 1 


In [None]:
os.environ["OPENAI_API_KEY"] = "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" 
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Heuristic filters
pattern = "◊û◊™◊ó. ◊î?◊¢◊ï?◊†◊ô?◊©"
required_parts = [
    "◊û◊™◊ó◊û◊ô ◊¢◊†◊ô◊©◊î", "◊ê◊ó◊ô◊ì◊ï◊™ ◊ë◊¢◊†◊ô◊©◊î", "◊û◊™◊ó◊ù ◊î◊¢◊†◊ô◊©◊î", "◊û◊™◊ó◊ù ◊¢◊†◊ô◊©◊î",
    "◊û◊™◊ó◊û◊ô ◊¢◊†◊ô◊©◊î", "◊û◊™◊ó◊û◊ô ◊î◊¢◊†◊ô◊©◊î", "◊û◊™◊ó◊ù ◊î◊¢◊ï◊†◊©", "◊û◊™◊ó◊ù ◊¢◊ï◊†◊©"
]

def filter_csv_relevant_parts(csv_data):
    for idx, row in csv_data.iterrows():
        if any(part in str(row.get("part", "")) for part in required_parts):
            return csv_data.iloc[idx:]
    return pd.DataFrame(columns=csv_data.columns)

def filter_by_pattern(csv_data):
    return csv_data[csv_data['text'].str.contains(pattern, na=False)]

def extract_punishment_range_tagging_db(directory_path: str):
    input_path="/home/liorkob/M.Sc/thesis/data/drugs_3k/verdict_csv"
    output_path = os.path.join(directory_path, "punishment_ranges_1.csv")
    potential_path = os.path.join(directory_path, "potential_matches_1.csv")
    fail_log_path = os.path.join(directory_path, "fail_reasons_1.csv")
    debug_path = os.path.join(directory_path, "debug_potential_sentences_1.csv")

    if os.path.exists(output_path):
        done_df = pd.read_csv(output_path)
        done_files = set(done_df["file_name"].unique())
        output_rows = done_df.values.tolist()
        counter = len(output_rows)
    else:
        done_files, output_rows, counter = set(), [], 0

    if os.path.exists(potential_path):
        potential_df = pd.read_csv(potential_path)
        potential_matches = potential_df.values.tolist()
    else:
        potential_matches = []

    fail_log = []
    debug_sentences = []

    all_csvs = [
        os.path.join(input_path, f)
        for f in os.listdir(input_path)
        if f.endswith(".csv") and not f.startswith("punishment_ranges")
    ]

    for file_path in tqdm(all_csvs, desc="üîç Processing CSV files"):
        file_name = os.path.basename(file_path)
        # if file_name !=  "◊™◊§_1057-05-15.csv" :
        #     continue
        if file_name in done_files:
            continue

        try:
            df = pd.read_csv(file_path)
        except Exception as e:
            print(f"‚ùå Failed to read {file_name}: {e}")
            continue

        matches = 0
        strategy_used = ""
        filtered_df = pd.DataFrame()

        # ◊©◊ú◊ë 1: required_parts ‚Üí classifier
        part_df = filter_csv_relevant_parts(df)
        if not part_df.empty:
            for _, row in part_df.iterrows():
                text = str(row["text"])
                if should_call_gpt(text):
                    filtered_df = pd.concat([filtered_df, pd.DataFrame([row])], ignore_index=True)
            if not filtered_df.empty:
                strategy_used = "required_part"

        # ◊©◊ú◊ë 2: ◊ê◊ù filtered_df ◊¢◊ì◊ô◊ô◊ü ◊®◊ô◊ß ‚Üí ◊®◊í◊ß◊°
        if filtered_df.empty:
            regex_df = filter_by_pattern(df)
            if not regex_df.empty:
                filtered_df = regex_df
                strategy_used = "pattern"

        # ◊©◊ú◊ë 3: fallback
        if filtered_df.empty:
            for _, row in df.iterrows():
                text = str(row["text"])
                if should_call_gpt(text):
                    filtered_df = pd.concat([filtered_df, pd.DataFrame([row])], ignore_index=True)
            if not filtered_df.empty:
                strategy_used = "classifier"

        for _, row in filtered_df.iterrows():
            text = row['text']
            gpt_output = ""
            extracted_range = "None"

            q2 = f"""◊ë◊û◊©◊§◊ò ◊î◊ë◊ê ◊®◊©◊ï◊ù ◊û◊™◊ó◊ù ◊¢◊†◊ô◊©◊î (◊ò◊ï◊ï◊ó ◊¢◊ï◊†◊©).
            ◊ê◊™◊î ◊¶◊®◊ô◊ö ◊ú◊î◊ó◊ñ◊ô◊® ◊ê◊ï◊™◊ï ◊ë◊§◊ï◊®◊û◊ò:
            {{◊û◊°◊§◊®}} ◊ó◊ï◊ì◊©◊ô◊ù - {{◊û◊°◊§◊®}} ◊ó◊ï◊ì◊©◊ô◊ù

            ◊©◊ô◊ù ◊ú◊ë:
            - ◊ê◊ù ◊®◊©◊ï◊ù \"◊©◊†◊î\" ‚Üí ◊™◊ó◊©◊ë ◊õ-12 ◊ó◊ï◊ì◊©◊ô◊ù
            - \"◊©◊†◊™◊ô◊ô◊ù\" ‚Üí 24 ◊ó◊ï◊ì◊©◊ô◊ù
            - \"◊©◊†◊î ◊ï◊ó◊¶◊ô\" ‚Üí 18 ◊ó◊ï◊ì◊©◊ô◊ù
            - ◊ê◊ù ◊®◊©◊ï◊ù \"5 ◊¢◊ì 7 ◊©◊†◊ô◊ù\" ‚Üí ◊™◊ó◊ñ◊ô◊® \"60 ◊ó◊ï◊ì◊©◊ô◊ù - 84 ◊ó◊ï◊ì◊©◊ô◊ù\"
            - ◊ê◊ù ◊ê◊ô◊ü ◊ò◊ï◊ï◊ó ◊ë◊®◊ï◊® ‚Äì ◊™◊ó◊ñ◊ô◊® ◊ê◊™ ◊î◊û◊ô◊ú◊î \"◊ú◊ê\"

            ◊î◊û◊©◊§◊ò ◊î◊ï◊ê:
            {text}"""
            try:
                response = client.chat.completions.create(
                    model="gpt-4.1-mini",
                    messages=[{"role": "user", "content": q2}]
                )
                gpt_output = response.choices[0].message.content.strip()
                potential_matches.append([file_name, text.strip(), gpt_output])

                match = re.search(r"(\d+)\s*◊ó◊ï◊ì◊©◊ô◊ù?\s*[-‚Äì]\s*(\d+)\s*◊ó◊ï◊ì◊©◊ô◊ù?", gpt_output)
                if match:
                    low, high = match.groups()
                    extracted_range = f"{low} - {high}"
                    output_rows.append([file_name,text.strip(), low, high])
                    counter += 1
                    matches += 1
                    print(f"‚úÖ Extracted punishment range: {extracted_range}")

                    if counter % 50 == 0:
                        pd.DataFrame(output_rows, columns=["file_name", "text","low", "high"]).to_csv(output_path, index=False)
                        pd.DataFrame(potential_matches, columns=["file_name", "text", "gpt_response"]).to_csv(potential_path, index=False)
            except Exception:
                continue

            debug_sentences.append({
                "file_name": file_name,
                "sentence": text.strip(),
                "strategy_used": strategy_used,
                "passed_classifier": (strategy_used == "classifier"),
                "gpt_output": gpt_output,
                "extracted_range": extracted_range
            })

        df = pd.DataFrame(output_rows, columns=["file_name", "text", "low", "high"])
        df = df.sort_values("file_name")
        df = df.drop_duplicates(subset="file_name", keep="last")

        fail_log.append({
            "file_name": file_name,
            "required_parts_found": not part_df.empty,
            "pattern_matched": not filter_by_pattern(df).empty,
            "classified_sentences": len(filtered_df),
            "punishment_ranges_found": matches
        })

    pd.DataFrame(output_rows, columns=["file_name", "text","low", "high"]).to_csv(output_path, index=False)
    pd.DataFrame(potential_matches, columns=["file_name", "text", "gpt_response"]).to_csv(potential_path, index=False)
    pd.DataFrame(fail_log).to_csv(fail_log_path, index=False)
    pd.DataFrame(debug_sentences).to_csv(debug_path, index=False)
    print(f"‚úÖ Final results saved to: {output_path}")
    print(f"üìù GPT matches saved to: {potential_path}")
    print(f"üìÑ Failure log saved to: {fail_log_path}")
    print(f"üêõ Sentence-level debug saved to: {debug_path}")

    
extract_punishment_range_tagging_db('/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/punishment_range')




‚úÖ Extracted punishment range: 1 - 18
‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 0 - 18
‚úÖ Extracted punishment range: 0 - 12


üîç Processing CSV files:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2444/3045 [2:54:22<46:45,  4.67s/it]

‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 9 - 24
‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 18 - 36
‚úÖ Extracted punishment range: 14 - 30
‚úÖ Extracted punishment range: 12 - 48
‚úÖ Extracted punishment range: 16 - 36


üîç Processing CSV files:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2448/3045 [2:54:33<28:12,  2.84s/it]

‚úÖ Extracted punishment range: 1 - 18
‚úÖ Extracted punishment range: 8 - 18


üîç Processing CSV files:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2450/3045 [2:54:42<34:42,  3.50s/it]

‚úÖ Extracted punishment range: 6 - 24
‚úÖ Extracted punishment range: 7 - 18
‚úÖ Extracted punishment range: 10 - 36


üîç Processing CSV files:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2451/3045 [2:54:44<31:52,  3.22s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 0 - 10


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2452/3045 [2:54:54<51:02,  5.16s/it]

‚úÖ Extracted punishment range: 6 - 14
‚úÖ Extracted punishment range: 7 - 14
‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 10 - 20
‚úÖ Extracted punishment range: 4 - 14


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2454/3045 [2:55:03<45:45,  4.65s/it]

‚úÖ Extracted punishment range: 12 - 30
‚úÖ Extracted punishment range: 7 - 18
‚úÖ Extracted punishment range: 7 - 18


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2455/3045 [2:55:06<41:49,  4.25s/it]

‚úÖ Extracted punishment range: 0 - 18


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2456/3045 [2:55:10<40:41,  4.15s/it]

‚úÖ Extracted punishment range: 8 - 30
‚úÖ Extracted punishment range: 27 - 50


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2458/3045 [2:55:22<48:37,  4.97s/it]

‚úÖ Extracted punishment range: 18 - 36
‚úÖ Extracted punishment range: 1 - 6
‚úÖ Extracted punishment range: 10 - 18
‚úÖ Extracted punishment range: 16 - 36
‚úÖ Extracted punishment range: 1 - 6


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2459/3045 [2:55:28<50:53,  5.21s/it]

‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 8 - 24


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2460/3045 [2:55:31<44:11,  4.53s/it]

‚úÖ Extracted punishment range: 3 - 12
‚úÖ Extracted punishment range: 6 - 24


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2463/3045 [2:55:37<26:18,  2.71s/it]

‚úÖ Extracted punishment range: 12 - 12


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2464/3045 [2:55:45<42:09,  4.35s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 7 - 14
‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 7 - 18
‚úÖ Extracted punishment range: 0 - 8
‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 6 - 13


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2465/3045 [2:55:51<46:44,  4.83s/it]

‚úÖ Extracted punishment range: 48 - 120


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2466/3045 [2:55:52<36:26,  3.78s/it]

‚úÖ Extracted punishment range: 16 - 32


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2467/3045 [2:55:57<39:29,  4.10s/it]

‚úÖ Extracted punishment range: 20 - 40
‚úÖ Extracted punishment range: 9 - 18
‚úÖ Extracted punishment range: 15 - 30


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2468/3045 [2:55:59<33:53,  3.52s/it]

‚úÖ Extracted punishment range: 9 - 24


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2470/3045 [2:56:08<37:03,  3.87s/it]

‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 0 - 12


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2471/3045 [2:56:11<32:48,  3.43s/it]

‚úÖ Extracted punishment range: 8 - 16


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2472/3045 [2:56:14<32:14,  3.38s/it]

‚úÖ Extracted punishment range: 10 - 24


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2473/3045 [2:56:16<28:30,  2.99s/it]

‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 2474/3045 [2:56:18<24:30,  2.58s/it]

‚úÖ Extracted punishment range: 10 - 30


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2476/3045 [2:56:25<29:13,  3.08s/it]

‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 0 - 24


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2477/3045 [2:56:28<31:04,  3.28s/it]

‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 9 - 9


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2478/3045 [2:56:34<38:44,  4.10s/it]

‚úÖ Extracted punishment range: 10 - 20


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2479/3045 [2:56:39<41:30,  4.40s/it]

‚úÖ Extracted punishment range: 0 - 8


üîç Processing CSV files:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2480/3045 [2:56:46<46:20,  4.92s/it]

‚úÖ Extracted punishment range: 6 - 14
‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2482/3045 [2:56:54<40:52,  4.36s/it]

‚úÖ Extracted punishment range: 18 - 36
‚úÖ Extracted punishment range: 6 - 6
‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 10 - 30


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2483/3045 [2:56:57<36:58,  3.95s/it]

‚úÖ Extracted punishment range: 33 - 60


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2484/3045 [2:57:06<52:46,  5.64s/it]

‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 36 - 60
‚úÖ Extracted punishment range: 18 - 36
‚úÖ Extracted punishment range: 9 - 24
‚úÖ Extracted punishment range: 20 - 40
‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 24 - 40


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2485/3045 [2:57:13<56:26,  6.05s/it]

‚úÖ Extracted punishment range: 14 - 24
‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 8 - 18


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2488/3045 [2:57:20<29:48,  3.21s/it]

‚úÖ Extracted punishment range: 48 - 72
‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 36 - 60
‚úÖ Extracted punishment range: 30 - 54
‚úÖ Extracted punishment range: 36 - 60
‚úÖ Extracted punishment range: 36 - 56
‚úÖ Extracted punishment range: 30 - 48


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2489/3045 [2:57:26<38:28,  4.15s/it]

‚úÖ Extracted punishment range: 32 - 48


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2490/3045 [2:57:27<28:32,  3.09s/it]

‚úÖ Extracted punishment range: 10 - 30


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2491/3045 [2:57:30<28:33,  3.09s/it]

‚úÖ Extracted punishment range: 5 - 13


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2492/3045 [2:57:34<32:17,  3.50s/it]

‚úÖ Extracted punishment range: 9 - 20
‚úÖ Extracted punishment range: 8 - 24
‚úÖ Extracted punishment range: 9 - 24
‚úÖ Extracted punishment range: 6 - 24


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2494/3045 [2:57:42<32:58,  3.59s/it]

‚úÖ Extracted punishment range: 2 - 10


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2495/3045 [2:57:46<33:37,  3.67s/it]

‚úÖ Extracted punishment range: 8 - 8


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2497/3045 [2:57:50<24:23,  2.67s/it]

‚úÖ Extracted punishment range: 38 - 48
‚úÖ Extracted punishment range: 18 - 30
‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 12 - 36
‚úÖ Extracted punishment range: 24 - 48


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2498/3045 [2:57:57<35:03,  3.85s/it]

‚úÖ Extracted punishment range: 0 - 12


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2500/3045 [2:58:03<32:10,  3.54s/it]

‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2501/3045 [2:58:06<30:38,  3.38s/it]

‚úÖ Extracted punishment range: 16 - 32


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2502/3045 [2:58:09<28:50,  3.19s/it]

‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2504/3045 [2:58:16<27:37,  3.06s/it]

‚úÖ Extracted punishment range: 8 - 24


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2506/3045 [2:58:23<29:09,  3.25s/it]

‚úÖ Extracted punishment range: 10 - 24
‚úÖ Extracted punishment range: 9 - 9
‚úÖ Extracted punishment range: 0 - 4


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2507/3045 [2:58:26<28:51,  3.22s/it]

‚úÖ Extracted punishment range: 12 - 36


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2508/3045 [2:58:30<30:09,  3.37s/it]

‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 1 - 3
‚úÖ Extracted punishment range: 0 - 8


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2509/3045 [2:58:36<36:24,  4.08s/it]

‚úÖ Extracted punishment range: 40 - 60


üîç Processing CSV files:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 2512/3045 [2:58:49<42:33,  4.79s/it]

‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2513/3045 [2:58:50<32:21,  3.65s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 4 - 8


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2515/3045 [2:58:56<32:34,  3.69s/it]

‚úÖ Extracted punishment range: 7 - 24
‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 8 - 18


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2516/3045 [2:58:58<28:06,  3.19s/it]

‚úÖ Extracted punishment range: 13 - 26
‚úÖ Extracted punishment range: 10 - 30
‚úÖ Extracted punishment range: 9 - 22
‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 9 - 24


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2517/3045 [2:59:04<36:02,  4.10s/it]

‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 3 - 14


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2518/3045 [2:59:07<32:27,  3.69s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 10 - 24
‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 8 - 8


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2519/3045 [2:59:13<38:12,  4.36s/it]

‚úÖ Extracted punishment range: 8 - 24
‚úÖ Extracted punishment range: 2 - 9
‚úÖ Extracted punishment range: 2 - 9


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2520/3045 [2:59:18<39:59,  4.57s/it]

‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2521/3045 [2:59:20<32:22,  3.71s/it]

‚úÖ Extracted punishment range: 2 - 8
‚úÖ Extracted punishment range: 15 - 36


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2522/3045 [2:59:25<36:54,  4.23s/it]

‚úÖ Extracted punishment range: 12 - 30


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2524/3045 [2:59:29<26:05,  3.01s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 18 - 30
‚úÖ Extracted punishment range: 12 - 36


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2526/3045 [2:59:34<22:37,  2.61s/it]

‚úÖ Extracted punishment range: 0 - 18
‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2527/3045 [2:59:35<20:31,  2.38s/it]

‚úÖ Extracted punishment range: 54 - 72
‚úÖ Extracted punishment range: 24 - 36
‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 21 - 48


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2528/3045 [2:59:39<22:38,  2.63s/it]

‚úÖ Extracted punishment range: 30 - 60


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2529/3045 [2:59:42<25:17,  2.94s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 11 - 34


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2530/3045 [2:59:54<46:46,  5.45s/it]

‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 15 - 30
‚úÖ Extracted punishment range: 18 - 36
‚úÖ Extracted punishment range: 18 - 30
‚úÖ Extracted punishment range: 23 - 51
‚úÖ Extracted punishment range: 24 - 52
‚úÖ Extracted punishment range: 22 - 50
‚úÖ Extracted punishment range: 24 - 48
‚úÖ Extracted punishment range: 18 - 43


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2532/3045 [3:00:06<48:37,  5.69s/it]

‚úÖ Extracted punishment range: 22 - 46


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2533/3045 [3:00:09<40:05,  4.70s/it]

‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 5 - 12


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2534/3045 [3:00:13<38:04,  4.47s/it]

‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2535/3045 [3:00:14<29:17,  3.45s/it]

‚úÖ Extracted punishment range: 6 - 6
‚úÖ Extracted punishment range: 6 - 8


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2536/3045 [3:00:20<36:05,  4.25s/it]

‚úÖ Extracted punishment range: 0 - 18


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2537/3045 [3:00:25<37:53,  4.47s/it]

‚úÖ Extracted punishment range: 18 - 30
‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2538/3045 [3:00:27<33:00,  3.91s/it]

‚úÖ Extracted punishment range: 5 - 12


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2539/3045 [3:00:29<27:44,  3.29s/it]

‚úÖ Extracted punishment range: 8 - 8


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2541/3045 [3:00:32<19:54,  2.37s/it]

‚úÖ Extracted punishment range: 25 - 45


üîç Processing CSV files:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2542/3045 [3:00:36<23:19,  2.78s/it]

‚úÖ Extracted punishment range: 6 - 6


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2543/3045 [3:00:39<22:56,  2.74s/it]

‚úÖ Extracted punishment range: 0 - 6


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2544/3045 [3:00:40<18:23,  2.20s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 12 - 36
‚úÖ Extracted punishment range: 10 - 24
‚úÖ Extracted punishment range: 4 - 24
‚úÖ Extracted punishment range: 9 - 24
‚úÖ Extracted punishment range: 0 - 8


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2546/3045 [3:00:48<26:45,  3.22s/it]

‚úÖ Extracted punishment range: 6 - 15
‚úÖ Extracted punishment range: 8 - 18


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2547/3045 [3:00:56<38:08,  4.59s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 6 - 15


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2548/3045 [3:00:59<32:42,  3.95s/it]

‚úÖ Extracted punishment range: 1 - 8


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2549/3045 [3:01:01<29:29,  3.57s/it]

‚úÖ Extracted punishment range: 30 - 50
‚úÖ Extracted punishment range: 30 - 50


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 2550/3045 [3:01:05<29:58,  3.63s/it]

‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2552/3045 [3:01:07<18:13,  2.22s/it]

‚úÖ Extracted punishment range: 6 - 24


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2553/3045 [3:01:08<14:24,  1.76s/it]

‚úÖ Extracted punishment range: 24 - 48


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2554/3045 [3:01:10<16:27,  2.01s/it]

‚úÖ Extracted punishment range: 0 - 18


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2557/3045 [3:01:17<15:35,  1.92s/it]

‚úÖ Extracted punishment range: 0 - 8
‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2558/3045 [3:01:21<20:55,  2.58s/it]

‚úÖ Extracted punishment range: 9 - 18
‚úÖ Extracted punishment range: 12 - 30
‚úÖ Extracted punishment range: 5 - 15
‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 24 - 120
‚úÖ Extracted punishment range: 10 - 10
‚úÖ Extracted punishment range: 9 - 18
‚úÖ Extracted punishment range: 6 - 18


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2559/3045 [3:01:46<1:16:39,  9.46s/it]

‚úÖ Extracted punishment range: 24 - 40


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2562/3045 [3:01:52<34:15,  4.26s/it]  

‚úÖ Extracted punishment range: 36 - 60


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2564/3045 [3:01:54<20:52,  2.60s/it]

‚úÖ Extracted punishment range: 15 - 30


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2565/3045 [3:01:59<25:50,  3.23s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 1 - 9


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2567/3045 [3:02:07<29:50,  3.75s/it]

‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 18 - 48


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2568/3045 [3:02:10<27:55,  3.51s/it]

‚úÖ Extracted punishment range: 0 - 18
‚úÖ Extracted punishment range: 10 - 24


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2570/3045 [3:02:13<19:35,  2.48s/it]

‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 7 - 24
‚úÖ Extracted punishment range: 0 - 18
‚úÖ Extracted punishment range: 5 - 10
‚úÖ Extracted punishment range: 4 - 8
‚úÖ Extracted punishment range: 1 - 15
‚úÖ Extracted punishment range: 7 - 15
‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 8 - 20
‚úÖ Extracted punishment range: 5 - 15
‚úÖ Extracted punishment range: 8 - 24


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2571/3045 [3:02:25<41:38,  5.27s/it]

‚úÖ Extracted punishment range: 18 - 36


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2572/3045 [3:02:32<44:01,  5.58s/it]

‚úÖ Extracted punishment range: 8 - 18


üîç Processing CSV files:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2573/3045 [3:02:33<35:18,  4.49s/it]

‚úÖ Extracted punishment range: 4 - 9


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2574/3045 [3:02:35<28:18,  3.61s/it]

‚úÖ Extracted punishment range: 9 - 20
‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2576/3045 [3:02:41<24:51,  3.18s/it]

‚úÖ Extracted punishment range: 7 - 18


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2577/3045 [3:02:43<21:41,  2.78s/it]

‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 15 - 34


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2578/3045 [3:02:47<24:40,  3.17s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 8 - 24


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2579/3045 [3:02:49<21:37,  2.78s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2580/3045 [3:03:00<41:22,  5.34s/it]

‚úÖ Extracted punishment range: 8 - 18


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2583/3045 [3:03:12<31:25,  4.08s/it]

‚úÖ Extracted punishment range: 0 - 15
‚úÖ Extracted punishment range: 11 - 24


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2584/3045 [3:03:16<31:43,  4.13s/it]

‚úÖ Extracted punishment range: 0 - 12


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2585/3045 [3:03:20<30:49,  4.02s/it]

‚úÖ Extracted punishment range: 24 - 48


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2587/3045 [3:03:23<20:44,  2.72s/it]

‚úÖ Extracted punishment range: 0 - 10


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 2588/3045 [3:03:28<27:23,  3.60s/it]

‚úÖ Extracted punishment range: 6 - 24


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2589/3045 [3:03:34<32:12,  4.24s/it]

‚úÖ Extracted punishment range: 36 - 60
‚úÖ Extracted punishment range: 6 - 18


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2590/3045 [3:03:42<40:24,  5.33s/it]

‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 6 - 18


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2592/3045 [3:03:45<25:32,  3.38s/it]

‚úÖ Extracted punishment range: 30 - 50
‚úÖ Extracted punishment range: 10 - 20
‚úÖ Extracted punishment range: 18 - 48


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2595/3045 [3:03:51<19:31,  2.60s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 5 - 18


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2597/3045 [3:03:55<15:29,  2.07s/it]

‚úÖ Extracted punishment range: 6 - 9
‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 6 - 9


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2598/3045 [3:04:00<21:28,  2.88s/it]

‚úÖ Extracted punishment range: 7 - 20


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2599/3045 [3:04:06<30:16,  4.07s/it]

‚úÖ Extracted punishment range: 8 - 18
‚úÖ Extracted punishment range: 6 - 6
‚úÖ Extracted punishment range: 2 - 8
‚úÖ Extracted punishment range: 0 - 12
‚úÖ Extracted punishment range: 4 - 18
‚úÖ Extracted punishment range: 8 - 20


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2601/3045 [3:04:22<44:47,  6.05s/it]

‚úÖ Extracted punishment range: 42 - 84
‚úÖ Extracted punishment range: 32 - 64
‚úÖ Extracted punishment range: 24 - 54
‚úÖ Extracted punishment range: 6 - 24
‚úÖ Extracted punishment range: 10 - 26
‚úÖ Extracted punishment range: 12 - 48
‚úÖ Extracted punishment range: 24 - 60
‚úÖ Extracted punishment range: 20 - 48


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2602/3045 [3:04:28<46:01,  6.23s/it]

‚úÖ Extracted punishment range: 3 - 12
‚úÖ Extracted punishment range: 8 - 27


üîç Processing CSV files:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2603/3045 [3:04:31<37:19,  5.07s/it]

‚úÖ Extracted punishment range: 3 - 12


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2604/3045 [3:04:35<35:35,  4.84s/it]

‚úÖ Extracted punishment range: 0 - 12


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2605/3045 [3:04:35<25:34,  3.49s/it]

‚úÖ Extracted punishment range: 10 - 20


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2607/3045 [3:04:47<33:42,  4.62s/it]

‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 18 - 18
‚úÖ Extracted punishment range: 0 - 9
‚úÖ Extracted punishment range: 12 - 24
‚úÖ Extracted punishment range: 10 - 20
‚úÖ Extracted punishment range: 6 - 18
‚úÖ Extracted punishment range: 12 - 24


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2609/3045 [3:04:54<26:30,  3.65s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 12 - 14
‚úÖ Extracted punishment range: 8 - 16
‚úÖ Extracted punishment range: 6 - 18


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2610/3045 [3:04:57<26:23,  3.64s/it]

‚úÖ Extracted punishment range: 0 - 6


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2611/3045 [3:05:04<32:31,  4.50s/it]

‚úÖ Extracted punishment range: 24 - 48


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2612/3045 [3:05:11<37:14,  5.16s/it]

‚úÖ Extracted punishment range: 12 - 36


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2613/3045 [3:05:20<46:47,  6.50s/it]

‚úÖ Extracted punishment range: 6 - 12


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2617/3045 [3:05:33<28:20,  3.97s/it]

‚úÖ Extracted punishment range: 6 - 6
‚úÖ Extracted punishment range: 6 - 8
‚úÖ Extracted punishment range: 3 - 24
‚úÖ Extracted punishment range: 10 - 24
‚úÖ Extracted punishment range: 6 - 36


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2618/3045 [3:05:38<31:40,  4.45s/it]

‚úÖ Extracted punishment range: 6 - 6
‚úÖ Extracted punishment range: 12 - 30


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2622/3045 [3:05:47<16:16,  2.31s/it]

‚úÖ Extracted punishment range: 6 - 12
‚úÖ Extracted punishment range: 12 - 36


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2624/3045 [3:05:53<16:43,  2.38s/it]

‚úÖ Extracted punishment range: 6 - 18


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2625/3045 [3:06:01<29:25,  4.20s/it]

‚úÖ Extracted punishment range: 18 - 36


üîç Processing CSV files:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 2626/3045 [3:06:10<39:01,  5.59s/it]

In [None]:
import pandas as pd

# ========== ◊†◊™◊ô◊ë◊ô◊ù ==========
old_debug_path = "/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/punishment_range/debug_potential_sentences.csv"
new_debug_path = "/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/punishment_range/debug_potential_sentences_1.csv"

old_output_path = "/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/punishment_range/punishment_ranges.csv"
new_output_path = "/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/punishment_range/punishment_ranges_1.csv"

# ========== ◊î◊©◊ï◊ï◊ê◊™ DEBUG ==========
old_debug = pd.read_csv(old_debug_path)
new_debug = pd.read_csv(new_debug_path)

debug_merged = old_debug.merge(
    new_debug,
    on=["file_name", "sentence"],
    how="outer",
    suffixes=("_old", "_new"),
    indicator=True
)

debug_diffs = debug_merged[
    (debug_merged["strategy_used_old"] != debug_merged["strategy_used_new"]) |
    (debug_merged["extracted_range_old"] != debug_merged["extracted_range_new"]) |
    (debug_merged["_merge"] != "both")
]

# ========== ◊î◊©◊ï◊ï◊ê◊™ OUTPUT ==========
old_output = pd.read_csv(old_output_path)
new_output = pd.read_csv(new_output_path)

output_merged = old_output.merge(
    new_output,
    on=["file_name", "text"],
    how="outer",
    suffixes=("_old", "_new"),
    indicator=True
)

output_diffs = output_merged[
    (output_merged["low_old"] != output_merged["low_new"]) |
    (output_merged["high_old"] != output_merged["high_new"]) |
    (output_merged["_merge"] != "both")
]

# ========== ◊©◊û◊ô◊®◊î ◊ú◊ß◊ë◊¶◊ô◊ù ==========
debug_diffs.to_csv("debug_differences.csv", index=False)
output_diffs.to_csv("punishment_differences.csv", index=False)

print("‚úÖ ◊†◊©◊û◊®◊ï ◊ß◊ë◊¶◊ô ◊î◊î◊ë◊ì◊ú◊ô◊ù:")
print("- debug_differences.csv")
print("- punishment_differences.csv")


In [None]:
import pandas as pd

facts_df = pd.read_csv("/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/processed_verdicts_with_gpt.csv")  # contains: verdict, extracted_gpt_facts
ranges_df = pd.read_csv("/home/liorkob/M.Sc/thesis/data/drugs_3k/gpt/punishment_range/punishment_ranges_1.csv")  # contains: file_name, low, high

# Clean file_name column (remove .csv extension)
ranges_df["verdict"] = ranges_df["file_name"].str.replace(".csv", "", regex=False)

# Merge on 'verdict'
merged_df = pd.merge(facts_df, ranges_df[["verdict", "low", "high"]], on="verdict", how="left")

# Remove duplicates ‚Äì keep only one row per verdict
merged_df = merged_df.drop_duplicates(subset="verdict", keep="first")

# Save result
merged_df.to_csv("merged_output_1.csv", index=False)



df = pd.read_csv("merged_output_1.csv")
print("Total rows in CSV:", len(df))
print("Unique verdicts:", df["verdict"].nunique())
print("Unique extracted_gpt_facts:", df["extracted_gpt_facts"].nunique())


print(df[["low", "high"]].describe())
print((df["low"] == df["high"]).mean())  # percent of equal ranges
df[df["extracted_gpt_facts"].duplicated(keep=False)].sort_values("extracted_gpt_facts")[["extracted_gpt_facts", "low", "high"]]
