# Case Study: Zeeguu/API
> **Credits:** The code extends code provided by [Mircae Lungu](https://github.com/mircealungu) for the course **Software Architecture ITU 2024**
- Backend of a web application that supports [free reading in foreign languages](https://zeeguu.org)
- Open source [repository on GH](https://github.com/zeeguu/API/)

## Table of conentents
1. [Basic Data Gathering](#basic-data-gathering)
    1. [Extract dependencies](#extract-dependencies)
    2. [Visualize](#Visualize)
2. [Abstraction](#Abstraction)
3. [Evolution](#Evolution)



## Basic Data Gathering

- extracting basic dependencies between python modules
- every .py file is called a module in Python
- direct relationship between file name and module name
  - file: `./zeeguu_core/model/user.py` <==>
  - module: `zeeguu_core.model.User`


In [234]:
# Credit: https://colab.research.google.com/drive/1oe_TV7936Zmmzbbgq8rzqFpxYPX7SQHP#scrollTo=Njkjj4fzUV2E
# Installing Required Dependencies
import sys
sys.version
!{sys.executable} -m pip install gitpython
!{sys.executable} -m pip install pyvis

87765.32s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
87772.05s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


In [235]:
# Adopted from: https://colab.research.google.com/drive/1oe_TV7936Zmmzbbgq8rzqFpxYPX7SQHP#scrollTo=Njkjj4fzUV2E
import os
from git import Repo

# Current Working Directory
cwd = os.getcwd()
print(cwd)

# Code location
CODE_ROOT_FOLDER=f"/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/data/zeeguu-api/"

# Clone the repository
if not os.path.exists(CODE_ROOT_FOLDER):
  Repo.clone_from("https://github.com/zeeguu/api", CODE_ROOT_FOLDER)



/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/data/zeeguu-api


In [236]:
# Count absolute lines of code and number of files 
!cd {CODE_ROOT_FOLDER} && git ls-files | grep '\.py$' | xargs wc -l | grep total
!cd {CODE_ROOT_FOLDER} && git ls-files | grep "\.py$" | wc -l

87779.97s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
   21206 total
87785.64s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
     278


In [237]:
# helpers
def file_path(file_name):
    return f"{CODE_ROOT_FOLDER}{file_name}"


def module_name_from_file_path(full_path):
    """
    ../core/model/user.py -> zeeguu.core.model.user
    """
    file_name = full_path[len(CODE_ROOT_FOLDER):]
    file_name = file_name.replace("/__init__.py","")
    file_name = file_name.replace("/",".")
    file_name = file_name.replace(".py","")
    return file_name

File_Name = "zeeguu/core/model/user.py"
print(file_path(File_Name))
assert file_path(File_Name) == "/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/data/zeeguu-api/zeeguu/core/model/user.py"
assert module_name_from_file_path(file_path(File_Name)) == "zeeguu.core.model.user"


def module_name_from_rel_path(full_path):

    # e.g. ../core/model/user.py -> zeeguu.core.model.user

    file_name = full_path.replace("/__init__.py","")
    file_name = file_name.replace("/",".")
    file_name = file_name.replace(".py","")
    return file_name

assert ("tools.migrations.teacher_dashboard_migration_1.upgrade" == module_name_from_rel_path("tools/migrations/teacher_dashboard_migration_1/upgrade.py"))
assert ("zeeguu.api") == module_name_from_rel_path("zeeguu/api/__init__.py")


  

/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/data/zeeguu-api/zeeguu/core/model/user.py


### AST based parsing

In [238]:
# Import class
from collections import defaultdict

class Import:
    def __init__(self, module:str, total_calls:int=0):
        self.module = module
        self.function_calls = defaultdict(int)
        self.distinct_calls = set()
        self.total_calls = total_calls

    def __str__(self):
        return (f"Import: module {self.module}\n"
                f"Function calls {[f'{k} : {v}' for k,v in self.function_calls.items()]}\n"
                f"Distinct calls {self.distinct_calls}\n"
                f"Total calls: {self.total_calls}\n")



In [239]:
import ast
from collections import defaultdict
from typing import assert_type


def parse_imports(file):
    with open(file) as f:
        tree = ast.parse(f.read(), filename=file)
    
    imports = set() # all imported modules
    imports_v : defaultdict[str, Import] = defaultdict(lambda : Import("None") )
    function_to_module = {} # alias to module mapping
    function_to_module_function = {} # alias to module mapping
    
    try:
        for node in ast.walk(tree):
            match node:
                case ast.Import(names=names ):
                    for alias in names:
                        imports.add(alias.name)
                        imports_v[alias.name] = Import(alias.name)
                
                case ast.ImportFrom(module=module, names=names, level=level):
                    # What if module == .?
                    #print(f"Import from: {module} {names}")
                    if module == "word_scheduling.basicSR.basicSR":
                        print(f"Found: {module} {level} {module_name_from_file_path(file)}")
                    if level > 0:
                        # from .hello.package import module
                        #print(f"Relative import: {module} {names} {level} {file}")
                        pre_module = module_name_from_file_path(file).split(".")
                        while level > 1: # ._common_api_parameters
                            pre_module.pop()
                            level -= 1
                        pre_module = ".".join(pre_module)
                        if module == None: # from . import module
                            module = pre_module
                        else:
                            module = pre_module + "." + module #zeeguu.api.endpoints.teacher_dashboard._common_api_parameters
                        #print(f"fixed import: {module} {names} {level} {file}")
                        
                    imports.add(module)
                    imports_v[module] = Import(module) 
                
                    
                    for alias in names:
                        function_to_module[alias.name] = module
                        function_to_module_function[alias.name] = module + "." + alias.name
                        if "zeeguu.word_scheduling.basicSR.basicSR" == module:
                           print(f"Found: {module} {alias.name}")

                # The imported function is assigned to a variable
                # case ast.Assign(_):
                #     print(f"Assign: {node}")
                case ast.Assign(targets=[ast.Name(id=id)], value=ast.Call(func=ast.Name (id=name), args=args, keywords=keywords)):
                    if name in function_to_module: 
                        #print(f"Assign: {id} {name}")   
                        function_to_module[id] = function_to_module[name]
                        function_to_module_function[id] = function_to_module_function[name]
                    
                
                case ast.Call(func=ast.Attribute(value=ast.Name(id=id), attr=attr), args=args, keywords=keywords): # logger.log("asdasdasd")
                    if id in function_to_module.keys():
                        #print(f"Call: {id} {attr}")
                        imports_v[function_to_module[id]].function_calls[f"{function_to_module_function[id]}.{attr}"] += 1
                        imports_v[function_to_module[id]].total_calls += 1
                        imports_v[function_to_module[id]].distinct_calls.add(f"{function_to_module_function[id]}.{attr}")
                    
                case ast.Call(func=ast.Name(id=id), args=args, keywords=keywords): # log("asdasdsd")
                    
                    if id in function_to_module.keys():
                        #print(f"Call: {id}"
                        imports_v[function_to_module[id]].function_calls[f"{function_to_module_function[id]}"] += 1
                        imports_v[function_to_module[id]].total_calls += 1
                        imports_v[function_to_module[id]].distinct_calls.add(f"{function_to_module_function[id]}")
                case ast.Call(func=call_value):
                    attr_stack = []
                    
                    while isinstance(call_value, ast.Attribute):
                        attr_stack.append(call_value.attr)
                        call_value = call_value.value
                        
                    match call_value:
                        case ast.Name(id=id):
                            attr_stack.append(call_value.id)
                        case ast.Constant(value=value):
                            attr_stack.append(call_value.value)
                        case _ :
                            # Out of scope for now
                            continue
                    
                    call = ".".join(reversed(attr_stack))
                    function_to_module[f"{call}"] = call.split(".")[0]
                    if call.split(".")[0] in imports_v.keys() or call in function_to_module.keys():
                        #print(f"Call: {call}")   
                        imports_v[call.split(".")[0]].function_calls[f"{call}"] += 1
                        imports_v[call.split(".")[0]].total_calls += 1
                        imports_v[call.split(".")[0]].distinct_calls.add(f"{call}")
                
    except Exception as e:
        print(f"Error in {file}: {e}")
        print(f"Module: {module}")
        print(f"Alias: {alias.name}")
                    

    return imports, imports_v
#print(parse_imports(file_path("zeeguu/api/endpoints/teacher_dashboard/student.py"))[0])
#print(parse_imports(file_path("zeeguu/api/endpoints/teacher_dashboard/student.py"))[1]["zeeguu.api.utils"])
#print(parse_imports(file_path("zeeguu/api/endpoints/teacher_dashboard/student.py"))[1]["zeeguu.api.endpoints.teacher_dashboard.student"])
#print(parse_imports(file_path("zeeguu/cl/__init__.py"))[0])
#print(parse_imports(file_path("zeeguu/core/content_recommender/elastic_recommender.py"))[0])
#print(parse_imports(file_path("zeeguu/core/emailer/zeeguu_mailer.py"))[0])
#print(parse_imports(file_path("zeeguu/core/emailer/zeeguu_mailer.py"))[1]["zeeguu"])
#print(parse_imports(file_path("zeeguu/api/utils/translator.py"))[1]["zeeguu.logging"])
assert parse_imports(file_path('zeeguu/core/model/unique_code.py'))[0] == {'datetime', 'zeeguu.core', 'zeeguu.core.model', 'sqlalchemy', 'random'}

imports = parse_imports(file_path('zeeguu/core/model/unique_code.py'))
#print(f"Imports: {imports[0]}")
#print(f"Import_v: {imports[1]}")
print(f"{imports[1]['zeeguu.core.model']}")


Import: module zeeguu.core.model
Function calls ['zeeguu.core.model.db.Column : 4', 'zeeguu.core.model.db.String : 2']
Distinct calls {'zeeguu.core.model.db.Column', 'zeeguu.core.model.db.String'}
Total calls: 6



### Extract dependencies and visalize
To do that we iterate over all the python files with the help of the Path.rglob function from pathlib
And we create a network with the help of the networkx package.Visualize

In [240]:
import pyvis.network as Network
import matplotlib.pyplot as plt
from pathlib import Path
import networkx as nx

# a function to draw a graph
def draw_graph_plt(G, size, **args):
    plt.figure(figsize=size)
    nx.draw(G, **args)
    plt.show()
    
    


## Abstraction
What do we have now:
- System: zeeguu/api
- Source View: Modules & Dependencies
- Entities: .py files in the project
- Relationships: import statements between .py files

Plan: Abstraction methods
1. Folder hierarchy
2. Aggregate dependencies using metrics. (Sum of calls)
    - Total count of explicit low-level dependencies
    - Number of distinct explicit low-level dependencies
    - Network analysis to detect rank packages: Note (It should not be that hard, the networkx package supports various methods of network analysis, e.g. centrality, HITS, pagerank.)
3. Create different level graphs and pass them to OpenAI vision model

### Filter relevant modules

In [256]:

def relevant_module(module_name, top_level_package="all"):
    """
    Define relevant modules
    """
    if "test" in module_name:
        return False
    if top_level_package == "all":
        if module_name.startswith("zeeguu"):
            return True
    else:
        if module_name.startswith(top_level_package):
            return True
    


    return False

In [257]:
def top_level_package(module_name, depth=1):
    """Extract parent of module at depth"""
    components = module_name.split(".")
    return ".".join(components[:depth])

assert (top_level_package("zeeguu.core.model.util") == "zeeguu")
assert (top_level_package("zeeguu.core.model.util", 2) == "zeeguu.core")

def merge_imports(import1 : Import, import2 : Import) -> Import:
    """
    Merge two imports into a new import
    :param import1: Import to merge into
    :param import2: Import to merge from
    :return: import 1 with merged values
    """
    merged : Import = Import(import1.module)
    merged.total_calls = import1.total_calls + import2.total_calls
    merged.distinct_calls = import1.distinct_calls.union(import2.distinct_calls)
    merged.function_calls = import1.function_calls.copy()
    
    for k,v in import2.function_calls.items():
        merged.function_calls[k] += v
    
    return merged
    
assert merge_imports(Import("zeeguu.core", 10), Import("zeeguu.core.model", 20)).total_calls == 30
assert merge_imports(Import("zeeguu.core", 10), Import("zeeguu.core.model", 20)).module == "zeeguu.core"


In [264]:
def dependencies_digraph_filtered_v2(code_root_folder, filter_package = "all") -> (nx.DiGraph, defaultdict[str, defaultdict[str, Import]]):
    files = Path(code_root_folder).rglob("*.py")
    import_map : defaultdict[str, defaultdict[str, Import]] =  defaultdict(lambda : defaultdict(lambda: Import("None")))

    G = nx.DiGraph()
    
    for file in files:
        file_path = str(file)

        source_module = module_name_from_file_path(file_path)
        if not relevant_module(source_module, filter_package):
            continue
        
        imports, imports_v = parse_imports(file_path)
        
        import_map[source_module] = imports_v #
        
        if source_module not in G.nodes:
            G.add_node(source_module)
        
        for target_module in imports_v.keys():

            if relevant_module(target_module, filter_package):
                
                import_object = imports_v[target_module]
                G.add_edge(source_module, target_module, value=import_object.total_calls,
                           title=f"Total calls: {import_object.total_calls}\n"
                                 f"Distinct calls: {len(import_object.distinct_calls)}\n"
                                 f"Functions: \n{"\n".join(import_object.distinct_calls)}")
    
    
    return G, import_map

def abstracted_to_top_level(G, import_map, depth=1):
    aG = nx.DiGraph()
    abstracted_import_map : defaultdict[str, defaultdict[str, Import]] =  defaultdict(lambda : defaultdict(lambda: Import("None")))
    
    for edge in G.edges():
        src = top_level_package(edge[0], depth)
        dst = top_level_package(edge[1], depth)
    
        if src != dst:
            
            #print(f"Import {edge[0]} => {edge[1]} ---------------: {src} => {dst}")
            abstracted_import_map[src][dst].module = dst
            abstracted_import_map[src][dst] = merge_imports(abstracted_import_map[src][dst], import_map[edge[0]][edge[1]])
            count = abstracted_import_map[src][dst].total_calls
            relavent_imports = [(k,v) for k,v in abstracted_import_map[src][dst].function_calls.items() if relevant_module(k)]

            aG.add_edge(src, dst, value= count, title=f"Total calls: {count}\n"
                                     f"Distinct calls: {len(relavent_imports)}\n"
                                     f"Functions: \n{"\n".join([f'{k} : {v}' for k,v in sorted(relavent_imports, key=lambda x: x[1], reverse=True)])}")

    return aG, abstracted_import_map


depth = 2
G, import_map = dependencies_digraph_filtered_v2(CODE_ROOT_FOLDER)
AG, a_import_map = abstracted_to_top_level(G, import_map, depth)
print(G) 
print(AG)
print(f"Import map: {import_map["zeeguu.core"].keys()}")
print(f"Abstracted map {a_import_map["zeeguu.core"].keys()}")


  """


Found: word_scheduling.basicSR.basicSR 2 zeeguu.core.model


  MULTIPLE_NEWLINES = re.compile("\n\s*\n")
  words = [w for w in words if re.search("\d", w) == None]


DiGraph with 220 nodes and 563 edges
DiGraph with 6 nodes and 7 edges
Import map: dict_keys([])
Abstracted map dict_keys(['zeeguu', 'zeeguu.logging'])


In [290]:
def draw_graph_pyvis_v2(G, size, output_file, **args):
    h, w = size
    # Adjaency list
    neighbors = G.adj
    # Add neighbors on hover
    for node in G.nodes:
        G.nodes[node]["title"] = " Imports:\n" + "\n".join(neighbors[node])
        G.nodes[node]["group"] = top_level_package(node, 2)
    
    # Scaling the size of the nodes by 5*degree
    scale = 3 # Scaling the size of the nodes by 10*degree
    degrees = dict(G.degree())
    degrees.update((x, scale*y) for x, y in degrees.items())
    nx.set_node_attributes(G, degrees, "size")


        
    g = Network.Network(height=h, width=w,notebook=True, cdn_resources='in_line', directed=True,  **args) 
    g.show_buttons()
    g.from_nx(G)
    # Set font size
    for n in g.nodes:
        n["font"]['size'] = 100
    g.barnes_hut()
    g.show(output_file)

SRC_DIR = "/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction"
DG, import_map = dependencies_digraph_filtered_v2(CODE_ROOT_FOLDER)
ADG2, a_import_map = abstracted_to_top_level(DG, import_map, 2)
ADG3, a_import_map = abstracted_to_top_level(DG, import_map, 3)
ADG4, a_import_map = abstracted_to_top_level(DG, import_map, 4)
ADG5, a_import_map = abstracted_to_top_level(DG, import_map, 10)

draw_graph_pyvis_v2(ADG2, ("800px","100%"), f"{SRC_DIR}/public/dict-depth-2-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2(ADG3, ("800px","100%"), f"{SRC_DIR}/public/dict-depth-3-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2(ADG4, ("800px","100%"), f"{SRC_DIR}/public/dict-depth-4-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2(ADG5, ("800px","100%"), f"{SRC_DIR}/public/dict-depth-all-dep-graph.html", bgcolor="#222222", font_color="white")

  """
  MULTIPLE_NEWLINES = re.compile("\n\s*\n")


Found: word_scheduling.basicSR.basicSR 2 zeeguu.core.model


  words = [w for w in words if re.search("\d", w) == None]


/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/dict-depth-2-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/dict-depth-3-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/dict-depth-4-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/dict-depth-all-dep-graph.html


### Page rank abstraction

In [291]:

def draw_graph_pyvis_v2_pagerank(G, size, output_file, depth, **args):
    h, w = size
    # Adjaency list
    neighbors = G.adj
    # Add neighbors on hover
    
    # Scale acccording to page rank
    # PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. 
    prG = nx.pagerank(G)
    scale = 1000 if depth > 2 else 100
    prG = {k: v*scale for k,v in prG.items()}
    nx.set_node_attributes(G, prG, "size")
    
    

    for node in G.nodes:
        G.nodes[node]["title"] = f"Pagerank: {prG[node]}\nImports:\n{'\n'.join(neighbors[node])}"
        G.nodes[node]["group"] = top_level_package(node, 2)

        
    g = Network.Network(height=h, width=w,notebook=True, cdn_resources='in_line', directed=True,  **args) 
    g.show_buttons()
    g.from_nx(G)
    # Set font size
    for n in g.nodes:
        n["font"]['size'] = 100
    g.barnes_hut()
    g.show(output_file)
        
draw_graph_pyvis_v2_pagerank(ADG2, ("800px","100%"), f"{SRC_DIR}/public/pagerank-dict-depth-2-dep-graph.html", depth=2, bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2_pagerank(ADG3, ("800px","100%"), f"{SRC_DIR}/public/pagerank-dict-depth-3-dep-graph.html", depth=3, bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2_pagerank(ADG4, ("800px","100%"), f"{SRC_DIR}/public/pagerank-dict-depth-4-dep-graph.html", depth=4, bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2_pagerank(ADG5, ("800px","100%"), f"{SRC_DIR}/public/pagerank-dict-depth-all-dep-graph.html", depth=10, bgcolor="#222222", font_color="white")

/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/pagerank-dict-depth-2-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/pagerank-dict-depth-3-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/pagerank-dict-depth-4-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/pagerank-dict-depth-all-dep-graph.html


### Top level package filter

In [292]:
def draw_graph_pyvis_v2_package_filter(G, size, output_file,depth=3, **args):
    h, w = size
    
    # Add neighbors on hover
    prG = nx.pagerank(G)
    scale = 1000 if depth > 2 else 100
    prG = {k: v*scale for k,v in prG.items()}
    nx.set_node_attributes(G, prG, "size")
    

    # Adjaency list
    neighbors = G.adj
    for node in G.nodes:
        G.nodes[node]["title"] = f"Pagerank: {prG[node]}\nImports:\n{'\n'.join(neighbors[node])}"
        G.nodes[node]["group"] = top_level_package(node, 3)

        
    g = Network.Network(height=h, width=w,notebook=True, cdn_resources='in_line', directed=True,  **args) 
    g.show_buttons()
    g.from_nx(G)
    # Set font size
    for n in g.nodes:
        n["font"]['size'] = 100
    g.barnes_hut()
    g.show(output_file)

DG_core, import_map_core = dependencies_digraph_filtered_v2(CODE_ROOT_FOLDER, "zeeguu.core")
ADG2_core, a_import_map_core = abstracted_to_top_level(DG_core, import_map_core, 2)
ADG3_core, a_import_map_core = abstracted_to_top_level(DG_core, import_map_core, 3)
ADG4_core, a_import_map_core = abstracted_to_top_level(DG_core, import_map_core, 4)
ADG5_core, a_import_map_core = abstracted_to_top_level(DG_core, import_map_core, 10)

draw_graph_pyvis_v2_package_filter(ADG3_core, ("800px","100%"), f"{SRC_DIR}/public/core-dict-depth-3-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2_package_filter(ADG4_core, ("800px","100%"), f"{SRC_DIR}/public/core-dict-depth-4-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v2_package_filter(ADG5_core, ("800px","100%"), f"{SRC_DIR}/public/core-dict-depth-all-dep-graph.html", bgcolor="#222222", font_color="white")

  """
  MULTIPLE_NEWLINES = re.compile("\n\s*\n")


Found: word_scheduling.basicSR.basicSR 2 zeeguu.core.model


  words = [w for w in words if re.search("\d", w) == None]


/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/core-dict-depth-3-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/core-dict-depth-4-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/core-dict-depth-all-dep-graph.html


### AI summary abstraction

In [280]:
%pip install openai
%pip install python-dotenv
%pip install selenium


94231.58s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Note: you may need to restart the kernel to use updated packages.
94238.32s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Note: you may need to restart the kernel to use updated packages.
94244.97s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Collecting selenium
  Downloading selenium-4.20.0-py3-none-any.whl.metadata (6.9 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.25.0-py3-none-any.whl.metadata (8.7 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (f

In [298]:
from time import sleep
from selenium import webdriver

# Create a new instance of the Firefox driver
def save_screenshot(file):
    driver = webdriver.Chrome()

    # Navigate to the webpage
    driver.get(f"file://{SRC_DIR}/{file}")
    driver.set_window_size(1920, 1080)
    sleep(5)

    # Save screenshot as PNG
    driver.save_screenshot(f"{SRC_DIR}/{file}.png")


    # Quit the browser
    driver.quit()

save_screenshot("public/dict-depth-2-dep-graph.html")
save_screenshot("public/dict-depth-3-dep-graph.html")
save_screenshot("public/core-dict-depth-3-dep-graph.html")


In [273]:
# Switch to the source directory
%cd {SRC_DIR}

/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction


In [304]:
import html
from turtle import ht
from openai import OpenAI
import os
from dotenv import load_dotenv
import base64
import requests

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")


# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Define your OpenAI API key.
def get_openai_response(image):

    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
    }

    payload = {
    "model": "gpt-4-turbo",
    "messages": [
        {
        "role": "user",
        "content": [
            {
            "type": "text",
            "text": "Provide an analyis of the following dependency graphs, what are the key modules and their interactions? Highlight potential issues and findings. Respond with a bullet point summary"
            },
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64, {image}"
            },
            }
        ]
        }
    ],
    "max_tokens": 250
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    return response.json()




img1 = encode_image("public/dict-depth-2-dep-graph.html.png"),
img2 = encode_image("public/dict-depth-3-dep-graph.html.png"),
img3 = encode_image("public/core-dict-depth-4-dep-graph.html.png")
img4 = encode_image("public/core-dict-depth-3-dep-graph.html.png")



response = get_openai_response(images)

print(response)
# Save response to markdown file
with open("public/ai-abstraction.md", "w") as f:
    f.write(response["choices"][0]["message"]["content"])



{'id': 'chatcmpl-9Nfs5TX3MjFvxUX6eu2pZwucB3Iqg', 'object': 'chat.completion', 'created': 1715429525, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': "Based on the provided dependency graph, we can discern the following key modules and their interactions within the software architecture:\n\n- **`zeeguu.core.util` (in Red, Central Hub):**\n  - Serves as the central node in the graph, indicating a utility module that is heavily relied upon by numerous other modules.\n  - Connected to a wide range of other modules, signalling its function as a provider of commonly used utilities or functionalities across the system.\n\n- **Directly Connected Modules to `zeeguu.core.util`:**\n  - **`zeeguu.core.model`**: This module likely deals with core data models used throughout the application. Its direct connection to the utility module suggests that these data models utilize general utilities for their operations.\n  - **`zeeguu.core`**: Represe

## Evolution
Plan:
1.  Churn Find hot code -- Most changed/imporant regions

2. Extract multiple complementary module views from your case study system
3. Ensure that your layouts are readable - limit the number of nodes in a view, use a different layout in networkx, or use a different library than networkx
4. Augment each of the previously obtained module views by mapping the above-computed churn metric on the color of a given node

In [198]:
!{sys.executable} -m pip install pydriller

21065.88s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


In [199]:
from pydriller import Repository
REPO_DIR = 'https://github.com/zeeguu/api'

In [200]:
# for PyDriller to work we need to change directory to our local clone of the repo
%cd {CODE_ROOT_FOLDER}

/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/data/zeeguu-api


In [206]:
all_commits = list(Repository(REPO_DIR).traverse_commits())

def print_out_commit_details(commits):
  for commit in commits:
      print(commit)
      for each in commit.modified_files:
          print(f"{commit.author.name} {each.change_type} {each.filename}\n -{each.old_path}\n -{each.new_path}")

print_out_commit_details(all_commits[0:1])

<pydriller.domain.commit.Commit object at 0x10b4fc860>
Mircea Lungu ModificationType.ADD LICENSE
 -None
 -LICENSE
Mircea Lungu ModificationType.ADD README.md
 -None
 -README.md
Mircea Lungu ModificationType.ADD s
 -None
 -s
Mircea Lungu ModificationType.ADD setup.py
 -None
 -setup.py
Mircea Lungu ModificationType.ADD zeeguu_api.wsgi
 -None
 -zeeguu_api.wsgi
Mircea Lungu ModificationType.ADD __init__.py
 -None
 -zeeguu_api/__init__.py
Mircea Lungu ModificationType.ADD __main__.py
 -None
 -zeeguu_api/__main__.py
Mircea Lungu ModificationType.ADD __init__.py
 -None
 -zeeguu_api/api/__init__.py
Mircea Lungu ModificationType.ADD download_content_from_url.py
 -None
 -zeeguu_api/api/download_content_from_url.py
Mircea Lungu ModificationType.ADD exercises.py
 -None
 -zeeguu_api/api/exercises.py
Mircea Lungu ModificationType.ADD feeds.py
 -None
 -zeeguu_api/api/feeds.py
Mircea Lungu ModificationType.ADD sessions.py
 -None
 -zeeguu_api/api/sessions.py
Mircea Lungu ModificationType.ADD smartwatch

In [210]:
# sort by number of commits in decreasing order
def commit_counts(all_commits):
    commit_counts = defaultdict(int)

    for commit in all_commits:
        try:
            for each in commit.modified_files:
                commit_counts[each.new_path] += 1
        except:
            pass
    return commit_counts

sorted(commit_counts(all_commits).items(), key=lambda x: x[1], reverse=True)[:42]

[(None, 175),
 ('zeeguu_api/api/dashboard.py', 80),
 ('zeeguu_api/api/translate_and_bookmark.py', 78),
 ('zeeguu_api/api/teacher_dashboard.py', 76),
 ('requirements.txt', 52),
 ('zeeguu_api/api/feeds.py', 51),
 ('zeeguu_api/app.py', 51),
 ('zeeguu/core/emailer/zeeguu_mailer.py', 48),
 ('zeeguu_api/api/__init__.py', 44),
 ('zeeguu/core/content_retriever/article_downloader.py', 43),
 ('README.md', 41),
 ('zeeguu/core/model/article.py', 41),
 ('.travis.yml', 34),
 ('setup.py', 33),
 ('tools/update_article_content.py', 30),
 ('zeeguu_api/api/exercises.py', 29),
 ('zeeguu/core/model/user.py', 29),
 ('zeeguu/core/emailer/user_activity.py', 28),
 ('zeeguu_api/api/sessions.py', 27),
 ('zeeguu/core/model/__init__.py', 27),
 ('zeeguu_api/user_activity_hooks/article_interaction_hooks.py', 25),
 ('zeeguu_api.wsgi', 24),
 ('zeeguu/core/content_recommender/elastic_recommender.py', 24),
 ('zeeguu/core/model/language.py', 24),
 ('zeeguu_api/api/user_articles.py', 23),
 ('zeeguu_api/api/bookmarks_and_w

In [214]:
from collections import defaultdict
import comm
from pydriller import ModificationType

def commit_counts_better(all_commits):
    commit_counts = {}

    for commit in all_commits:
        try:
            for modification in commit.modified_files:

                new_path = modification.new_path
                old_path = modification.old_path

                if modification.change_type == ModificationType.RENAME:
                    commit_counts[new_path]=commit_counts.get(old_path,0)+1
                    commit_counts.pop(old_path)

                elif modification.change_type == ModificationType.DELETE:
                    commit_counts.pop(old_path, '')

                elif modification.change_type == ModificationType.ADD:
                    commit_counts[new_path] = 1

                else: # modification to existing file
                        commit_counts [old_path] += 1
        except Exception as e:
            print("something went wrong with: " + str(modification))
            pass
    return commit_counts

# sort by number of commits in decreasing order
sorted(commit_counts_better(all_commits).items(), key=lambda x: x[1], reverse=True)[:42]
# discussion: What is ("None", 103) ?

something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x14e753d40>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x14e753d40>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x14e753d40>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x14e753d40>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x14e753d40>


[('zeeguu/api/endpoints/translation.py', 95),
 ('zeeguu/api/app.py', 71),
 ('zeeguu/api/endpoints/__init__.py', 54),
 ('zeeguu/core/model/article.py', 51),
 ('zeeguu/core/emailer/zeeguu_mailer.py', 49),
 ('zeeguu/api/endpoints/exercises.py', 49),
 ('requirements.txt', 47),
 ('zeeguu/core/content_retriever/article_downloader.py', 45),
 ('README.md', 41),
 ('zeeguu/core/model/user.py', 34),
 ('zeeguu/api/endpoints/feature_toggles.py', 34),
 ('setup.py', 33),
 ('zeeguu/api/endpoints/user.py', 33),
 ('zeeguu/api/endpoints/user_articles.py', 31),
 ('zeeguu/core/model/__init__.py', 30),
 ('zeeguu/api/endpoints/accounts.py', 30),
 ('zeeguu/api/endpoints/sessions.py', 30),
 ('tools/update_article_content.py', 30),
 ('zeeguu/core/emailer/user_activity.py', 29),
 ('zeeguu/core/model/bookmark.py', 29),
 ('zeeguu/api/endpoints/bookmarks_and_words.py', 29),
 ('zeeguu/api/endpoints/speech.py', 29),
 ('zeeguu/api/endpoints/topics.py', 27),
 ('zeeguu/core/content_recommender/elastic_recommender.py', 2

In [223]:
from struct import pack


def package_activity(commit_counts, depth=2) -> defaultdict[str, int]:
    package_activity = defaultdict(int)

    for path, count in commit_counts.items():
        if ".py" in str(path):
            l2_module = top_level_package(module_name_from_rel_path(path), depth)
            package_activity[l2_module] += count

    return package_activity

#commit_counts = commit_counts_better(all_commits)
#package_activities = package_activity(commit_counts, 2)
#sorted(package_activity.items(), key=lambda x: x[1], reverse=True)    

In [230]:
def draw_graph_pyvis_v3(G, package_activity, size, output_file, **args):
    h, w = size
    # Adjaency list
    neighbors = G.adj
    # Add neighbors on hover
    for node in G.nodes:
        G.nodes[node]["title"] = "Package activity:"+ str(package_activity[node]) + "Imports:\n" + "\n".join(neighbors[node])
        G.nodes[node]["group"] = top_level_package(node, 2)
    
    # Scaling the size of the nodes from package activity
    sizes = {node: package_activity[node] for node in G.nodes}
    nx.set_node_attributes(G, sizes, "size")
        
    g = Network.Network(height=h, width=w,notebook=True, cdn_resources='in_line', directed=True,  **args) 
    g.show_buttons()
    g.from_nx(G)
    # Set font size
    for n in g.nodes:
        n["font"]['size'] = 75
    g.barnes_hut()
    g.show(output_file)

SRC_DIR = "/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction"
DG, import_map = dependencies_digraph_filtered_v2(CODE_ROOT_FOLDER)
ADG2, a_import_map = abstracted_to_top_level(DG, import_map, 2)
ADG3, a_import_map = abstracted_to_top_level(DG, import_map, 3)
ADG4, a_import_map = abstracted_to_top_level(DG, import_map, 4)
ADG5, a_import_map = abstracted_to_top_level(DG, import_map, 5)

commit_counts = commit_counts_better(all_commits)
package_activity2 = package_activity(commit_counts, 2)
package_activity3 = package_activity(commit_counts, 3)
package_activity4 = package_activity(commit_counts, 4)
package_activity5 = package_activity(commit_counts, 5)


draw_graph_pyvis_v3(ADG2, package_activity2, ("800px","100%"), f"{SRC_DIR}/public/churn_dict-depth-2-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v3(ADG3, package_activity3, ("800px","100%"), f"{SRC_DIR}/public/churn_dict-depth-3-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v3(ADG4, package_activity4, ("800px","100%"), f"{SRC_DIR}/public/churn_dict-depth-4-dep-graph.html", bgcolor="#222222", font_color="white")
draw_graph_pyvis_v3(ADG5, package_activity5, ("800px","100%"), f"{SRC_DIR}/public/churn_dict-depth-5-dep-graph.html", bgcolor="#222222", font_color="white")


    



  """
  MULTIPLE_NEWLINES = re.compile("\n\s*\n")


Found: word_scheduling.basicSR.basicSR 2 zeeguu.core.model


  words = [w for w in words if re.search("\d", w) == None]


something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x30693f2f0>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x30693f2f0>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x30693f2f0>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x30693f2f0>
something went wrong with: <pydriller.domain.commit.ModifiedFile object at 0x30693f2f0>
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/churn_dict-depth-2-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/churn_dict-depth-3-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/churn_dict-depth-4-dep-graph.html
/Users/andreaskongstad/Developer/PycharmProjects/architectural-reconstruction/public/churn_dict-depth-5-dep-graph.html


## Dynamic Analysis ()
Not as relavent for project.

In [None]:
import inspect

def methods_in_class(cls):
    """ Returns all the methods in a class """
    return [
		(name, object) 
		for (name, object) 
			in cls.__dict__.items() 
		if hasattr(object, '__call__')]
    
def log_decorator( function ):
    """ A decorator that logs the function on call """
    def decorated( *args, **kwargs ):
        print (f'I have been called: {function}')
        return function( *args,**kwargs )
    return decorated

def decorate_methods( cls, decorator ):
    """ Decorates all the methods in a class with a log_decorator"""
    methods = methods_in_class(cls)
    for name, method in methods:
	    setattr( cls, name, decorator ( method ))
    return cls


def caller(): 
	callee()

def callee():
    """ Prints the name of the calling function"""
    print(inspect.stack()[1].function)

caller()



In [None]:
# Decoreate the user class:
from zeeguu.core.model import User
decorate_methods(User, log_decorator)

u= User.find_by_id(534)
u.bookmark_count()

# to see even further one can instrument also third party libraries!
from sqlalchemy.orm.query import Query
decorate_methods(Query, log_decorator)
