In [1]:
import pandas as pd
import subprocess
import tempfile
import os
import json
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True)

solutions_train = pd.read_excel("data/for_teams/train/solutions.xlsx")
solutions_test = pd.read_excel("data/for_teams/test/solutions.xlsx")

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
def run_flake8_on_string(code_string: str):
    try:
        # Create a temporary file to hold the input Python code
        with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
            temp_file_path = temp_file.name
            temp_file.write(code_string.encode("utf-8"))

        # Run pyright on the temporary file
        result = subprocess.run(
            ["flake8", "--format=json", "--select=E999", temp_file_path],
            capture_output=True,
            text=True,
        )

    except FileNotFoundError:
        print("Pyright is not installed or not found in your PATH.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        pass
        # Clean up: Delete the temporary file
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)
            print(f"Temporary file {temp_file_path} deleted.")

    return result.stdout

def get_flake8_errors(code_string: str):
    basedpyright_output = run_flake8_on_string(code_string)
    lines = code_string.split("\n")
    errors = next(iter(json.loads(basedpyright_output).items()))[1]
    count = len(errors)
    if count == 0:
        return {
            "message": None,
            "line_number": None,
            "line": None,
        }
    errors = errors[0]

    return {
        "message": errors["text"],
        "line_number": errors["line_number"],
        "line": errors["physical_line"],
    }

In [3]:
solutions_train[["message", "line_number", "line"]] = solutions_train["student_solution"].parallel_apply(
    lambda x: pd.Series(get_flake8_errors(x))
)

solutions_train.to_csv("train_solutions_with_flake8.csv")

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=44), Label(value='0 / 44'))), HBox…

Temporary file /tmp/tmp6vpqbapi.py deleted.
Temporary file /tmp/tmprmg5k1t3.py deleted.
Temporary file /tmp/tmpte5jjhs9.py deleted.
Temporary file /tmp/tmpcb1jj76i.py deleted.Temporary file /tmp/tmpvhz2ydx8.py deleted.

Temporary file /tmp/tmp65pvdf96.py deleted.
Temporary file /tmp/tmptuz1flmd.py deleted.Temporary file /tmp/tmpvcolfx2_.py deleted.

Temporary file /tmp/tmpzx7qypsb.py deleted.
Temporary file /tmp/tmp7sx9g7im.py deleted.
Temporary file /tmp/tmpe3_c97v5.py deleted.
Temporary file /tmp/tmp6f383zqd.py deleted.
Temporary file /tmp/tmp7nib1cha.py deleted.
Temporary file /tmp/tmpzmf1qdia.py deleted.
Temporary file /tmp/tmpbny_zh7e.py deleted.
Temporary file /tmp/tmpi247f8dp.py deleted.
Temporary file /tmp/tmp7l9g83xh.py deleted.
Temporary file /tmp/tmp_hxm2k86.py deleted.
Temporary file /tmp/tmp3txiozl8.py deleted.
Temporary file /tmp/tmpthz1gvpf.py deleted.
Temporary file /tmp/tmphdomxpl5.py deleted.
Temporary file /tmp/tmp5nw5v4i4.py deleted.
Temporary file /tmp/tmp5hjua_qx.

In [4]:
solutions_test[["message", "line_number", "line"]] = solutions_test["student_solution"].parallel_apply(
    lambda x: pd.Series(get_flake8_errors(x))
)

solutions_test.to_csv("test_solutions_with_flake8.csv")

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=41), Label(value='0 / 41'))), HBox…

Temporary file /tmp/tmpak0tfsdu.py deleted.Temporary file /tmp/tmp6va544lj.py deleted.

Temporary file /tmp/tmpn33027gz.py deleted.
Temporary file /tmp/tmp4qktaglz.py deleted.
Temporary file /tmp/tmpkb31zs0b.py deleted.
Temporary file /tmp/tmps9dj1la7.py deleted.
Temporary file /tmp/tmpib9ha818.py deleted.
Temporary file /tmp/tmpvyu9k9w_.py deleted.
Temporary file /tmp/tmp69omzls_.py deleted.Temporary file /tmp/tmpsfj956w8.py deleted.Temporary file /tmp/tmpke_4lx96.py deleted.


Temporary file /tmp/tmpd4id8g_p.py deleted.
Temporary file /tmp/tmpzv238x59.py deleted.
Temporary file /tmp/tmpkakfw0fh.py deleted.
Temporary file /tmp/tmpwlndek2j.py deleted.
Temporary file /tmp/tmp46f7ujv3.py deleted.
Temporary file /tmp/tmpy0f5wvpb.py deleted.
Temporary file /tmp/tmp6xgkk0db.py deleted.
Temporary file /tmp/tmpjz9_0t53.py deleted.
Temporary file /tmp/tmprfk8oa5l.py deleted.
Temporary file /tmp/tmp4i708hv9.py deleted.
Temporary file /tmp/tmpdglg2x37.py deleted.
Temporary file /tmp/tmpezbp3ugz.