# LASSO Python
## MBPP Evaluation Analytics
This notebook showcases how a data analyst can use LASSO Python to collect dynamic information about Python code implementations.
By using data from an Apache Ignite cache that was filled by LASSO Python, the analyst can easily deduce interesting information, such as correctness of the code, execution times, or coverage data.
Both the tests and the Python code implementations used in this example stem from the Mostly Basic Python Problems (MBPP) dataset. MBPP is a dataset consisting of crowd-sourced Python programming problems. Each task consists of a description, a code solution and multiple test cases. By using LASSO Python, multiple of these tests were replicated with the results shown in this notebook. More information on MBPP can be found here: https://github.com/google-research/google-research/tree/master/mbpp

In [1]:
# Import result csv that was generated by LASSO Python and extracted from the Apache Ignite cache
import pandas as pd

file_path = './evaluation_results.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,EXECUTIONID,ABSTRACTIONID,ACTIONID,ARENAID,SHEETID,SYSTEMID,VARIANTID,ADAPTERID,X,Y,TYPE,VALUE,RAWVALUE,VALUETYPE,LASTMODIFIED,EXECUTIONTIME
0,e2009edf-95e1-4aa0-89e0-64c4d7419bff,Task8,,execute,8.xlsx,562286dcbbd409d0318a556a81e60e7e,original,"0('square_nums', 'square_nums', 0)",-1,9,metrics_covered_lines_in_file,2,2,<class 'int'>,"(datetime.datetime(2024, 9, 13, 18, 30, 1, 263...",-1
1,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task90,,execute,90.xlsx,fd1b0543b2a0c513e31b105f4211421b,original,"0('len_log', 'len_log', 0)",1,4,op,len_log,len_log,function,"(datetime.datetime(2024, 9, 13, 21, 34, 54, 75...",217
2,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task106,,execute,106.xlsx,,original,"0('create', 'createPythonObject', 0)",3,5,input_value,10,10,<class 'str'>,"(datetime.datetime(2024, 9, 13, 21, 36, 49, 53...",-1
3,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task95,,execute,95.xlsx,,original,"0('create', 'createPythonObject', 0)",4,11,input_value,4,4,<class 'int'>,"(datetime.datetime(2024, 9, 13, 21, 35, 43, 66...",-1
4,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task74,,execute,74.xlsx,c2c26f364a152e5f2b73f1851a6e7048,original,"0('is_samepatterns', 'is_samepatterns', 0)",-1,3,metrics_covered_lines_in_function,17,17,<class 'int'>,"(datetime.datetime(2024, 9, 13, 21, 32, 43, 39...",-1


In [2]:
# Compare oracle values (ground truth) with the values resulting from the LASSO Python test execution
value_df = df.query('TYPE == "value"')
oracle_df = df.query('TYPE == "oracle"')

merged_df = pd.merge(value_df, oracle_df, on=['EXECUTIONID', 'ABSTRACTIONID', 'SHEETID', 'X', 'Y'])
merged_df[['ABSTRACTIONID', 'Y', 'TYPE_x', 'VALUE_x', 'TYPE_y', 'VALUE_y']]

Unnamed: 0,ABSTRACTIONID,Y,TYPE_x,VALUE_x,TYPE_y,VALUE_y
0,Task3,1,value,False,oracle,False
1,Task115,6,value,False,oracle,False
2,Task104,9,value,"[['""green""', '""orange""'], ['""b",oracle,"[[""'green'"", ""'orange'""], [""'b"
3,Task68,6,value,False,oracle,False
4,Task2,4,value,"('5', '4')",oracle,"{'5', '4'}"
...,...,...,...,...,...,...
203,Task11,2,value,bcd,oracle,bcd
204,Task56,3,value,True,oracle,True
205,Task74,9,value,False,oracle,False
206,Task67,2,value,115975,oracle,115975


In [7]:
# Cases were return value and oracle value are different
merged_df.query('VALUE_x != VALUE_y')[['ABSTRACTIONID', 'Y', 'TYPE_x', 'VALUE_x', 'TYPE_y', 'VALUE_y']]

Unnamed: 0,ABSTRACTIONID,Y,TYPE_x,VALUE_x,TYPE_y,VALUE_y
2,Task104,9,value,"[['""green""', '""orange""'], ['""b",oracle,"[[""'green'"", ""'orange'""], [""'b"
4,Task2,4,value,"('5', '4')",oracle,"{'5', '4'}"
5,Task75,12,value,"[(5, 25, 30)]",oracle,"(5, 25, 30)"
12,Task88,9,value,"Counter({5: 3, 6: 2, 7: 2, 4:",oracle,"{10: 1, 5: 3, 6: 2, 7: 2, 4: 2"
16,Task103,1,value,UNSUCCESSFUL,oracle,4
19,Task2,12,value,"('13', '14')",oracle,"{'13', '14'}"
28,Task118,4,value,"['""lists', 'tuples', 'strings""",oracle,"[""'lists'"", ""'tuples'"", ""'stri"
35,Task105,2,value,UNSUCCESSFUL,oracle,2
50,Task108,15,value,"['1', '10', '12', '14', '14',",oracle,"['1', '2', '3', '4', '7', '8',"
53,Task87,10,value,"{'L': 'Lavender', 'B': 'Black'",oracle,"{'W': 'White', 'P': 'Pink', 'B"


In [8]:
# Cases were return value and oracle value are different
merged_df.query('VALUE_x != VALUE_y')[['ABSTRACTIONID', 'Y', 'TYPE_x', 'VALUE_x', 'TYPE_y', 'VALUE_y']]

Unnamed: 0,ABSTRACTIONID,Y,TYPE_x,VALUE_x,TYPE_y,VALUE_y
2,Task104,9,value,"[['""green""', '""orange""'], ['""b",oracle,"[[""'green'"", ""'orange'""], [""'b"
4,Task2,4,value,"('5', '4')",oracle,"{'5', '4'}"
5,Task75,12,value,"[(5, 25, 30)]",oracle,"(5, 25, 30)"
12,Task88,9,value,"Counter({5: 3, 6: 2, 7: 2, 4:",oracle,"{10: 1, 5: 3, 6: 2, 7: 2, 4: 2"
16,Task103,1,value,UNSUCCESSFUL,oracle,4
19,Task2,12,value,"('13', '14')",oracle,"{'13', '14'}"
28,Task118,4,value,"['""lists', 'tuples', 'strings""",oracle,"[""'lists'"", ""'tuples'"", ""'stri"
35,Task105,2,value,UNSUCCESSFUL,oracle,2
50,Task108,15,value,"['1', '10', '12', '14', '14',",oracle,"['1', '2', '3', '4', '7', '8',"
53,Task87,10,value,"{'L': 'Lavender', 'B': 'Black'",oracle,"{'W': 'White', 'P': 'Pink', 'B"


In [3]:
# Obtain statistics for the coverage metrics
coverage_df = df.query('TYPE == "metrics_covered_lines_in_function_ratio"')

coverage_df['VALUE'].astype(float).describe()

count    205.000000
mean      90.966134
std       18.261631
min        8.823529
25%      100.000000
50%      100.000000
75%      100.000000
max      100.000000
Name: VALUE, dtype: float64

In [4]:
# Obtain information about the longest execution times
sorted_df = df.sort_values(by='EXECUTIONTIME', ascending=False)
sorted_df.head()

Unnamed: 0,EXECUTIONID,ABSTRACTIONID,ACTIONID,ARENAID,SHEETID,SYSTEMID,VARIANTID,ADAPTERID,X,Y,TYPE,VALUE,RAWVALUE,VALUETYPE,LASTMODIFIED,EXECUTIONTIME
1686,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task87,,execute,87.xlsx,be3dbd04478c98e218868c16d718c964,original,"0('merge_dictionaries_three', 'merge_dictionar...",0,10,value,"{'L': 'Lavender', 'B': 'Black'","{'L': 'Lavender', 'B': 'Black', 'G': 'Green', ...",<class 'dict'>,"(datetime.datetime(2024, 9, 13, 21, 34, 12, 32...",40279
5234,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task87,,execute,87.xlsx,be3dbd04478c98e218868c16d718c964,original,"0('merge_dictionaries_three', 'merge_dictionar...",1,10,op,merge_dictionaries_three,merge_dictionaries_three,function,"(datetime.datetime(2024, 9, 13, 21, 34, 12, 32...",40279
3715,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task93,,execute,93.xlsx,25b10dca3d39feaa7bfe6590266b167c,original,"0('power', 'power', 0)",1,1,op,power,power,function,"(datetime.datetime(2024, 9, 13, 21, 35, 25, 76...",7572
4752,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task93,,execute,93.xlsx,25b10dca3d39feaa7bfe6590266b167c,original,"0('power', 'power', 0)",0,1,value,81,81,<class 'numpy.ma.MaskedArray'>,"(datetime.datetime(2024, 9, 13, 21, 35, 25, 76...",7572
960,6f41b57d-c704-4331-af4f-b03a54fb8fa7,Task84,,execute,84.xlsx,926dca20e1489334b93ce64b25a77fcb,original,"0('sequence', 'sequence', 0)",0,1,value,6,6,<class 'int'>,"(datetime.datetime(2024, 9, 13, 21, 33, 43, 88...",5877
