In [1]:
%load_ext autoreload
%autoreload 2
%load_ext snakeviz
%load_ext line_profiler

# turn off autoreload so that we can use the old model 
# when editing the current project

from coeditor.common import *
import os

os.chdir(proj_root())

In [2]:
import jedi, parso
from spot.static_analysis import PythonProject, UsageAnalysis
from coeditor.dataset import TokenizedEditDataset
from spot.utils import pretty_print_dict
from pprint import pprint


In [3]:
m = parso.parse("""
# hi there
"Here I have a string"
def f():
    pass
""")
print(m.dump())

Module([
    PythonNode('simple_stmt', [
        String('"Here I have a string"', (3, 0), prefix='\n# hi there\n'),
        Newline('\n', (3, 22)),
    ]),
    Function([
        Keyword('def', (4, 0)),
        Name('f', (4, 4), prefix=' '),
        PythonNode('parameters', [
            Operator('(', (4, 5)),
            Operator(')', (4, 6)),
        ]),
        Operator(':', (4, 7)),
        PythonNode('suite', [
            Newline('\n', (4, 8)),
            PythonNode('simple_stmt', [
                Keyword('pass', (5, 4), prefix='    '),
                Newline('\n', (5, 8)),
            ]),
        ]),
    ]),
    EndMarker('', (6, 0)),
])


In [4]:
from coeditor.code_change import *
from coeditor.history import get_commit_history

commits = get_commit_history(proj_root(), 20)
for c in commits[:3]:
    print(c)


CommitInfo(hash='08a94e620a9b5782838ec61e1fc86c284b50fc32', parents=('e4e82e3f8fa3a3fef5e2d51584aaa7644a6e1f5b',), msg='Add isort and black into pre-commit hooks.')
CommitInfo(hash='e4e82e3f8fa3a3fef5e2d51584aaa7644a6e1f5b', parents=('09002c3bee035ab4c767e4089aef44ea863ca366',), msg='Pre-commit hook changes.')
CommitInfo(hash='09002c3bee035ab4c767e4089aef44ea863ca366', parents=('bcbd5c8b6ee7e867f66e2ccdd55dfd8e220b8ae8',), msg='Add pre-commit hooks.')


In [5]:
from coeditor.code_change import edits_from_commit_history, _tlogger
from coeditor.ctx_change_encoder import (
    C3ProblemGenerator,
    C3ProblemTokenizer,
    JediUsageAnalyzer,
)

generator = C3ProblemGenerator()

workdir = proj_root() / "../temp-1"
# subprocess.run(["rm", "-r", workdir])
_tlogger.clear()
problems = edits_from_commit_history(proj_root(), commits, workdir, change_processor=generator)
print(f"{len(problems) = }")
for err, count in generator.analyzer.error_counts.items():
    print(f"({count=}): {err}")
display(_tlogger.as_dataframe())


building initial project: 100%|██████████| 61/61 [00:01<00:00, 49.06it/s]
processing commits: 100%|██████████| 19/19 [00:38<00:00,  2.03s/it]

len(problems) = 166





Unnamed: 0,name,count,avg_time,total_time
4,pre_edit_analysis,19,0.934073,17.74738
3,post_edit_analysis,19,0.222794,4.233084
1,parse_module,172,0.021197,3.645904
5,process_change,19,0.098445,1.870446
2,JModuleChange.from_modules,111,0.006585,0.730888
0,checkout,58,0.008889,0.515546


In [8]:
from coeditor.encoding import encode_diffs, _BaseTokenizer
encoder = C3ProblemTokenizer()
# %lprun -T output/lprof.txt -f C3ProblemTokenizer.tokenize_problem edits = [e for p in problems for e in encoder.tokenize_problem(p)]
edits = [encoder.tokenize_problem(p) for p in problems]
dset = TokenizedEditDataset.from_edits(edits)
pretty_print_dict(dset.overall_stats())
e=edits[4]
print(e.show())

n_edits: 166
n_additions: 0
input_tks:
   mean: 351.77
   median: 403
   min: 54
   max: 512
output_tks:
   mean: 62.163
   median: 46
   min: 8
   max: 256
n_references:
   mean: 9.9819
   median: 12
   min: 0
   max: 19
total_reference_tks:
   mean: 3771.2
   median: 4059
   min: 0
   max: 6142
--------------------------------------------------------------------------------
path: coeditor.code_change/_edits_from_commit_history
n_references: 12
total_reference_tks: 4031
project: temp-1
commit: CommitInfo(hash='f9e7d3bbf0646c97dce2c651fb83fba9c8cfcca8', parents=('f9c5006c3d426a199c472d33cc31b910793b0357',), msg='Switch data preparation to new format.')
<13>:<del>             processed = list(
<14>:<add>             processed = change_processor.process_change(
     <del>                 change_processor.process_change(pchange, pre_analysis, post_analysis)
<15>:<add>                 pchange, pre_analysis, post_analysis

# module: coeditor.code_change
def _edits_from_commit_history(
    p

In [23]:
from coeditor.encoding import *
decode_tokens(change_to_tokens(Deleted("abc\n")))

'<del> abc\n <del>'

In [6]:
%%timeit -n 1 -r 2
analysis = UsageAnalysis(pyproj, add_implicit_rel_imports=True, record_type_usages=True)

9.82 s ± 26 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


In [34]:
%%timeit -n 1 -r 2
PythonProject.from_root(proj_root())

4.79 s ± 21.6 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)


In [17]:
jproj = jedi.Project(proj_root())
jmodules = dict()
for f in proj_root().glob("src/**/*.py"):
    s = jedi.Script(path=f, project=jproj)
    jmodules[f] = s._module_node


In [3]:
jproj = jedi.Project(proj_root())
s = jedi.Script(path=proj_root() / "src/coeditor/usage_analysis.py")
s.goto(28, 9)


[<Name name='self', description='param self'>]

In [5]:
from spot.static_analysis import PythonModule

%lprun -T output/lprof.txt -f PythonModule.from_cst pyproj = PythonProject.from_root(proj_root())


*** Profile printout saved to text file 'output/lprof.txt'. 


Timer unit: 1e-09 s

Total time: 20.8927 s
File: /home/jiayi/Projects/SPOT/src/spot/static_analysis.py
Function: from_cst at line 495

Line #      Hits         Time  Per Hit   % Time  Line Contents
   495                                               @staticmethod
   496                                               def from_cst(module: cst.Module, name: str, drop_comments: bool) -> "PythonModule":
   497        60 4516638349.0 75277305.8     21.6          wrapper = MetadataWrapper(module)
   498        60 8465727458.0 141095457.6     40.5          src_map = dict(wrapper.resolve(PositionProvider))
   499        60     151722.0   2528.7      0.0          module = wrapper.module
   500        60      18831.0    313.9      0.0          removed_comments = []
   501        60      19426.0    323.8      0.0          if drop_comments:
   502        60    6497932.0 108298.9      0.0              remover = CommentRemover(src_map=src_map)
   503        60 5250321636.0 87505360.6     25.1        