In [132]:
import pandas as pd
import subprocess
import tempfile
import os
from typing import Any, Dict

# Functions for python code

In [28]:
def test_code_execution(code: str) -> Dict[str, Any]:
    """
    Тестирует выполнение Python кода в изолированном окружении
    """
    result = {
        'execution_successful': False,
        'output': '',
        'error': '',
        'exit_code': None
    }
    
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(code)
        temp_file = f.name
    
    try:
        process = subprocess.Popen(
            ['python', temp_file],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        
        output, error = process.communicate(timeout=10)
        
        result['exit_code'] = process.returncode
        result['execution_successful'] = process.returncode == 0
        result['output'] = output.strip()
        result['error'] = error.strip()
        
    except subprocess.TimeoutExpired:
        result['error'] = 'Execution timed out'
    except Exception as e:
        result['error'] = str(e)
    finally:
        os.unlink(temp_file)
    
    return result

def test_solutions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Тестирует все Python решения в датафрейме и добавляет результаты
    """
    df['execution_successful'] = None
    df['execution_output'] = None
    df['execution_error'] = None
    df['exit_code'] = None
    
    for idx, row in df.iterrows():
        if row['language'].lower() != 'python':
            continue
            
        code = row['programm_code_only']
        
        result = test_code_execution(code)
        
        df.at[idx, 'execution_successful'] = result['execution_successful']
        df.at[idx, 'execution_output'] = result['output']
        df.at[idx, 'execution_error'] = result['error']
        df.at[idx, 'exit_code'] = result['exit_code']
        
        if idx % 10 == 0:
            print(f"Processed {idx} solutions...")
    
    return df

# Использование:
# df = pd.read_csv('your_file.csv')
# df = test_solutions(df)
# df.to_csv('results.csv', index=False)

In [83]:
def check_code(df: pd.DataFrame) -> pd.DataFrame:
    table_checked = test_solutions(df)
        
    python_checked_code = table_checked[table_checked["language"] == "python"].reset_index(drop=True)
    return python_checked_code
# python_checked_code["execution_successful"].value_counts()

# Functions for javascript code

In [158]:
def test_js_code_execution(code: str) -> Dict[str, Any]:
    """
    Тестирует выполнение JavaScript кода через Node.js
    """
    result = {
        'execution_successful': False,
        'output': '',
        'error': '',
        'exit_code': None
    }
    
    with tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False) as f:
        f.write(code)
        temp_file = f.name
    
    try:
        process = subprocess.Popen(
            ['node', temp_file],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        
        output, error = process.communicate(timeout=10)
        
        result['exit_code'] = process.returncode
        result['execution_successful'] = process.returncode == 0
        result['output'] = output.strip()
        result['error'] = error.strip()
        
    except subprocess.TimeoutExpired:
        result['error'] = 'Execution timed out'
    except FileNotFoundError:
        result['error'] = 'Node.js not found. Please install Node.js to test JavaScript code.'
    except Exception as e:
        result['error'] = str(e)
    finally:
        os.unlink(temp_file)
    
    return result

def test_js_solutions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Тестирует все JavaScript решения в датафрейме и добавляет результаты
    """
    df['js_execution_successful'] = None
    df['js_execution_output'] = None
    df['js_execution_error'] = None
    df['js_exit_code'] = None
    
    for idx, row in df.iterrows():
        if row['language'].lower() not in ['javascript', 'js']:
            continue
            
        code = row['programm_code_only']
        
        result = test_js_code_execution(code)
        
        df.at[idx, 'js_execution_successful'] = result['execution_successful']
        df.at[idx, 'js_execution_output'] = result['output']
        df.at[idx, 'js_execution_error'] = result['error']
        df.at[idx, 'js_exit_code'] = result['exit_code']
        
        if idx % 10 == 0:
            print(f"Processed {idx} JavaScript solutions...")
    
    return df

def print_js_execution_stats(df: pd.DataFrame):
    """Выводит статистику по выполнению JavaScript кода"""
    js_solutions = df[df['language'].str.lower().isin(['javascript', 'js'])]
    total = len(js_solutions)
    
    if total == 0:
        print("No JavaScript solutions found in the dataset")
        return
        
    successful = js_solutions['js_execution_successful'].sum()
    failed = total - successful
    
    print(f"Total JavaScript solutions: {total}")
    print(f"Successfully executed: {successful} ({successful/total*100:.2f}%)")
    print(f"Failed to execute: {failed} ({failed/total*100:.2f}%)")
    
    # Типичные ошибки
    # if failed > 0:
    #     print("\nMost common errors:")
    #     error_counts = js_solutions[js_solutions['js_execution_error'].notna()]['js_execution_error'].value_counts().head()
    #     for error, count in error_counts.items():
    #         print(f"{count} occurrences: {error[:100]}...")

# Metrics counting

## DeepSeek-Qwen-1.5B

In [137]:
for i in range(len(table)):
    program_code = table["programm_code_only"][i]
    language = table["language"][i]

    if program_code[:6] == language:
        program_code = program_code[7:]
        table.loc[i, "programm_code_only"] = program_code
    elif program_code[:10] == language:
        program_code = program_code[11:]
        table.loc[i, "programm_code_only"] = program_code
    else:
        pass
    
    try:
        table.loc[i, "programm_code_only"] = program_code.strip().split("# Example")[0]
        table.loc[i, "doesnt_exit_code"] = 1
    except:
        table.loc[i, "doesnt_exit_code"] = 0
        

    

In [136]:
table = pd.read_csv("final_results/algorithms/deepseek-r1-qwen-1500M-algorithms.csv")

In [113]:
checked_table = check_code(table)
checked_table.to_csv("final_results/algorithms/deepseek-r1-qwen-1500M-algorithms_checked.csv", index=False)

checked_table["execution_successful"].value_counts()

Processed 0 solutions...
Processed 30 solutions...
Processed 40 solutions...
Processed 50 solutions...
Processed 60 solutions...
Processed 90 solutions...


True     40
False    11
Name: execution_successful, dtype: int64

In [140]:
checked_js = test_js_solutions(table)
print_js_execution_stats(checked_js)
checked_js.to_csv('final_results/algorithms/deepseek-r1-qwen-1500M-algorithms_checked_js.csv', index=False)

Processed 10 JavaScript solutions...
Processed 20 JavaScript solutions...
Processed 70 JavaScript solutions...
Processed 80 JavaScript solutions...
Total JavaScript solutions: 49
Successfully executed: 38 (77.55%)
Failed to execute: 11 (22.45%)


#### DeepSeek-Qwen-7B

In [141]:
table = pd.read_csv("final_results/algorithms/deepseek-r1-qwen-7B-algorithms.csv")

In [142]:
for i in range(len(table)):
    program_code = table["programm_code_only"][i]
    language = table["language"][i]

    if program_code[:6] == language:
        program_code = program_code[7:]
        table.loc[i, "programm_code_only"] = program_code
    elif program_code[:10] == language:
        program_code = program_code[11:]
        table.loc[i, "programm_code_only"] = program_code
    else:
        pass
    
    try:
        table.loc[i, "programm_code_only"] = program_code.strip().split("# Example")[0]
    except:
        pass

In [117]:
checked_table = check_code(table)
checked_table.to_csv("final_results/algorithms/deepseek-r1-qwen-7B-algorithms_checked.csv", index=False)

checked_table["execution_successful"].value_counts()

Processed 0 solutions...
Processed 30 solutions...
Processed 40 solutions...
Processed 50 solutions...
Processed 60 solutions...
Processed 90 solutions...


True     44
False     7
Name: execution_successful, dtype: int64

In [143]:
checked_js = test_js_solutions(table)
print_js_execution_stats(checked_js)
checked_js.to_csv('final_results/algorithms/deepseek-r1-qwen-7B-algorithms_checked_js.csv', index=False)

Processed 10 JavaScript solutions...
Processed 20 JavaScript solutions...
Processed 70 JavaScript solutions...
Processed 80 JavaScript solutions...
Total JavaScript solutions: 49
Successfully executed: 46 (93.88%)
Failed to execute: 3 (6.12%)


## DeepSeek-Llama-8B

In [155]:
table = pd.read_csv("final_results/algorithms/deepseek-r1-llama-8b-algorithms.csv")

In [156]:
for i in range(len(table)):
    program_code = table["programm_code_only"][i]
    language = table["language"][i]

    if program_code[:6] == language:
        program_code = program_code[7:]
        table.loc[i, "programm_code_only"] = program_code
    elif program_code[:10] == language:
        program_code = program_code[11:]
        table.loc[i, "programm_code_only"] = program_code
    else:
        pass
    
    try:
        table.loc[i, "programm_code_only"] = program_code.strip().split("# Example")[0]
    except:
        pass

In [122]:
checked_table = check_code(table)
checked_table.to_csv("final_results/algorithms/deepseek-r1-llama-8B-algorithms_checked.csv", index=False)

checked_table["execution_successful"].value_counts()

Processed 0 solutions...
Processed 30 solutions...
Processed 40 solutions...
Processed 50 solutions...
Processed 60 solutions...
Processed 90 solutions...


True     35
False    16
Name: execution_successful, dtype: int64

In [159]:
checked_js = test_js_solutions(table)
print_js_execution_stats(checked_js)
checked_js.to_csv('final_results/algorithms/deepseek-r1-llama-8B-algorithms_checked_js.csv', index=False)

Processed 10 JavaScript solutions...
Processed 20 JavaScript solutions...
Processed 70 JavaScript solutions...
Processed 80 JavaScript solutions...
Total JavaScript solutions: 49
Successfully executed: 40 (81.63%)
Failed to execute: 9 (18.37%)


In [160]:
# print(checked_js["programm_code_only"][1])

In [161]:
# print(checked_js["js_execution_error"][1])

## DeepSeek-qwen-14B

In [162]:
table = pd.read_csv("final_results/algorithms/deepseek-r1-qwen-14b-algorithms.csv")

In [163]:
for i in range(len(table)):
    program_code = table["programm_code_only"][i]
    language = table["language"][i]

    if program_code[:6] == language:
        program_code = program_code[7:]
        table.loc[i, "programm_code_only"] = program_code
    elif program_code[:10] == language:
        program_code = program_code[11:]
        table.loc[i, "programm_code_only"] = program_code
    else:
        pass
    
    try:
        table.loc[i, "programm_code_only"] = program_code.strip().split("# Example")[0]
    except:
        pass

In [127]:
checked_table = check_code(table)
checked_table.to_csv("final_results/algorithms/deepseek-r1-qwen-14B-algorithms_checked.csv", index=False)

checked_table["execution_successful"].value_counts()

Processed 0 solutions...


True     3
False    1
Name: execution_successful, dtype: int64

In [164]:
checked_js = test_js_solutions(table)
print_js_execution_stats(checked_js)
checked_js.to_csv('final_results/algorithms/deepseek-r1-qwen-14B-algorithms_checked_js.csv', index=False)

Processed 10 JavaScript solutions...
Total JavaScript solutions: 11
Successfully executed: 11 (100.00%)
Failed to execute: 0 (0.00%)


## DeepSeek-qwen-32B

In [165]:
table = pd.read_csv("final_results/algorithms/deepseek-r1-qwen-32b-algorithms.csv")

In [166]:
for i in range(len(table)):
    program_code = table["programm_code_only"][i]
    language = table["language"][i]

    if program_code[:6] == language:
        program_code = program_code[7:]
        table.loc[i, "programm_code_only"] = program_code
    elif program_code[:10] == language:
        program_code = program_code[11:]
        table.loc[i, "programm_code_only"] = program_code
    else:
        pass
    
    try:
        table.loc[i, "programm_code_only"] = program_code.strip().split("# Example")[0]
    except:
        pass

In [131]:
checked_table = check_code(table)
checked_table.to_csv("final_results/algorithms/deepseek-r1-qwen-32B-algorithms_checked.csv", index=False)

checked_table["execution_successful"].value_counts()

Processed 0 solutions...


True    4
Name: execution_successful, dtype: int64

In [167]:
checked_js = test_js_solutions(table)
print_js_execution_stats(checked_js)
checked_js.to_csv('final_results/algorithms/deepseek-r1-qwen-32B-algorithms_checked_js.csv', index=False)

Processed 10 JavaScript solutions...
Total JavaScript solutions: 11
Successfully executed: 11 (100.00%)
Failed to execute: 0 (0.00%)
