In [2]:
import os
import sys

import github as gh

from selenium import webdriver

from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

import time
from tqdm import tqdm
import pickle
from uuid import uuid4

import re

def generate_test_data():
    mock_repository_url='https://github.com/bamblebam/image-classification-rps'
    mock_python_file_url=f'{mock_repository_url}/blob/master/testmodels.py'
    options = Options()
    options.headless = True
    driver = webdriver.Chrome(
        ChromeDriverManager(path="./").install(), options=options
    )  # downloads the latest version of the chrome drivers
    driver.get(mock_python_file_url)
    time.sleep(2.5)
    code_body = driver.find_element_by_xpath(
                    "//*[@class='highlight tab-size js-file-line-container']"  # xpath for code container
                )
    with open('test_code.txt','w') as f:
        f.write(code_body.text)
    
generate_test_data()



Current google-chrome version is 92.0.4515
Get LATEST driver version for 92.0.4515
Driver [./\drivers\chromedriver\win32\92.0.4515.43\chromedriver.exe] found in cache


In [3]:
def getLayerSequence1helper(code):
    """
    Helper function to get models of type Sequential([...])
    :param code: the code from the .py file
    :return: list in correct sequence of layers
    :rtype: list model.add(.*)
    """
    rawLayerSequence = re.findall("Sequential\(\[([^]]+)\]\)", code, re.DOTALL)
    layerSequence = list()
    temp = list()
    for i in rawLayerSequence:
        temp = re.findall(".?(.*)\)", i)
        layerSequence.append(temp)
    return layerSequence

In [18]:
with open('test_code.txt') as f:
    code = f.read()
    f.close()
output=getLayerSequence1helper(code)
print(output)

[[" keras.layers.Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " keras.layers.Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " keras.layers.Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " keras.layers.Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " keras.layers.Dense(3, activation='softmax'", " keras.layers.Dense(3, activation='softmax'", " keras.layers.Dense(3, activation='softmax'"], [" Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " Dense(3, activation='softmax'", " Dense(3, activation='softmax'", " Dense(3, activation='softmax'"], [" keras.layers.Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " keras.layers.Conv2D(128, (3, 3), input_shape=(64, 64, 1), padding='same'", " keras.layers.Conv2D(128, (3, 3)

In [15]:
def getLayerSequence2(code, path):
    """
    Function to get models of type model.add(layerName)
    :param code: the code from the .py file
    :param path: path to save the data
    :return: None
    :rtype: None
    """
    layers = [
        "Conv2D",
        "Dense",
        "LSTM",
        "SimpleRNN",
        "Dropout",
        "Flatten",
        "ZeroPadding2D",
        "AveragePooling2D",
        "MaxPooling2D",
    ]
    model_names = re.findall("(.*) *=.*Sequential\(\)", code)
    # print(model_names)
    for model in model_names:
        print(model)
        isValid = True
        modelLayers = []
        model.strip()
        model = model.replace(" ", "")
        rawLayerSequences = re.findall(f"{model}\.add\((.*)\(", code)
        # print(rawLayerSequences)
        for rawLayerSequence in rawLayerSequences:
            k = rawLayerSequence.split("(")[0]
            k = k.split(".")[-1]
            if k not in layers:
                isValid = False
                break
            modelLayers.append(k)
        print(modelLayers)
        if len(modelLayers) < 4:
            isValid = False
        if isValid:
            print(model)
            model_to_pickle(modelLayers, path)
            
def model_to_pickle(model, path):
    """
    Function to convert the model to a pickle object
    :param model: the model to be converted
    :param path: path to save the data
    :return: None
    :rtype: None
    """
    fname = str(uuid4().hex[:32]) + ".pkl"
    fpath = os.path.join(path, fname)
    with open(fpath, "wb") as f:
        pickle.dump(model, f)
        f.close()

In [19]:
getLayerSequence2(code,'/')

model 
['Conv2D', 'Conv2D']
model5 
['Conv2D', 'Conv2D', 'MaxPooling2D', 'Dropout', 'Dense']
model5
