In [2]:
import pandas as pd
import subprocess
import tempfile
import os
import json
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True)

solutions_train = pd.read_excel("data/for_teams/train/solutions.xlsx")
solutions_test = pd.read_excel("data/for_teams/test/solutions.xlsx")

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [3]:
def run_pyright_on_string(code_string: str):
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
            temp_file_path = temp_file.name
            temp_file.write(code_string.encode("utf-8"))

        result = subprocess.run(
            ["basedpyright", "--outputjson", "--level", "error", "--project", ".", temp_file_path],
            capture_output=True,
            text=True,
        )

    except FileNotFoundError:
        print("Pyright is not installed or not found in your PATH.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)
            print(f"Temporary file {temp_file_path} deleted.")

    return result.stdout


def get_basedpyright_errors(code_string: str):
    basedpyright_output = run_pyright_on_string(code_string)
    lines = code_string.split("\n")
    errors = json.loads(basedpyright_output)["generalDiagnostics"]
    count = len(errors)
    if count == 0:
        return {"message": None, "line_number": None, "line": None}
    errors = errors[0]

    return {
        "message": errors["message"],
        "line_number": errors["range"]["start"]["line"],
        "line": lines[errors["range"]["start"]["line"]],
    }

In [4]:
solutions_train[["message", "line_number", "line"]] = solutions_train["student_solution"].parallel_apply(
    lambda x: pd.Series(get_basedpyright_errors(x))
)

solutions_train.to_csv("train_solutions_with_pyright.csv")

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=44), Label(value='0 / 44'))), HBox…

Temporary file /tmp/tmpkjamuzs1.py deleted.
Temporary file /tmp/tmpqpeqmnhy.py deleted.
Temporary file /tmp/tmp28cl7z7m.py deleted.Temporary file /tmp/tmp39o4vn1n.py deleted.

Temporary file /tmp/tmp7iuk5a9h.py deleted.
Temporary file /tmp/tmpe1px57op.py deleted.
Temporary file /tmp/tmppk7_r_cz.py deleted.
Temporary file /tmp/tmp0g5qblft.py deleted.
Temporary file /tmp/tmpxjkch8uk.py deleted.
Temporary file /tmp/tmpm3x6axa2.py deleted.
Temporary file /tmp/tmp8jxnsgfl.py deleted.
Temporary file /tmp/tmpit338tom.py deleted.
Temporary file /tmp/tmps0p2a7rl.py deleted.
Temporary file /tmp/tmpr9s55r65.py deleted.
Temporary file /tmp/tmp176z71y7.py deleted.
Temporary file /tmp/tmpopfwghq5.py deleted.
Temporary file /tmp/tmp7qfl06aa.py deleted.
Temporary file /tmp/tmpwde_0g73.py deleted.
Temporary file /tmp/tmptfg2k7z4.py deleted.
Temporary file /tmp/tmp5sy6q8l2.py deleted.
Temporary file /tmp/tmpndxyx3wa.py deleted.
Temporary file /tmp/tmp_yciq756.py deleted.
Temporary file /tmp/tmplsi1p15l.

In [None]:
solutions_test[["message", "line_number", "line"]] = solutions_test["student_solution"].parallel_apply(
    lambda x: pd.Series(get_basedpyright_errors(x))
)

solutions_test.to_csv("test_solutions_with_pyright.csv")