# Configurations and Loading Smart Contract

In [117]:
# For multiple solidity files optimization
FILES_DIR = "sol"
OUTPUT_DIR = "optimized"

### Main Options
# https://www.geeksforgeeks.org/best-fit-allocation-in-operating-system/
VARIABLE_PACKING = True  # This will only sort the variables byte size from big to small.
VARIABLE_PACKING_BEST_FIT = True  # Variable Packing variable needs to be turned on for this to work. (Decreases 16 gas from test SC)
DEFAULT_VALUE = True
INCREMENT_OPERATOR = True
DECREMENT_OPERATOR = True
PRECOMPUTE_VARIABLE_VALUE = True  # Unreference variables and precompute them if not used for increment/decrement or functions.
BOOLEAN_IF_CHECK = True
INDEX_EVENT_VARIABLE = True

### Miscellaneous Options (May increase gas cost if set to True)
PRECOMPUTE_VALUE = False  # Lazy check on every " = " and see if it can be evaluated with Python. Address is ignored.
INDEX_EVENT_STRING_BYTES = False  # Allow string and bytes variable to be available for indexing. (increases 12 gas from test SC)

In [118]:
# Load multiple source code
import os

if not os.path.exists(FILES_DIR):
  os.makedirs(FILES_DIR)
  print("Created", FILES_DIR, "directory.")

if not os.path.exists(OUTPUT_DIR):
  os.makedirs(OUTPUT_DIR)
  print("Created", OUTPUT_DIR, "directory.")

file_contents = {}

for i in os.listdir(FILES_DIR):
  #print(i)
  #print(os.path.splitext(i)[0])
  #print(os.path.join(FILES_DIR, i))
  #print("-------------------")
  if os.path.splitext(i)[1] == ".sol":
    f = open(os.path.join(FILES_DIR, i), "r")
    file_contents[os.path.splitext(i)[0]] = f.read()
    f.close()
    print("Read file", os.path.splitext(i)[0])

Created optimized directory.
Read file 9
Read file 10
Read file 6
Read file 7
Read file 8


In [111]:
# Test Only
file_contents = {}
f = open(os.path.join(FILES_DIR, "4_optimized_optimized.sol"), "r")
file_contents["test"] =  f.read()
f.close()

In [81]:
# For optimizing smart contract without using file directory
if file_contents == None or len(file_contents) == 0:
  file_contents = {}
  file_contents["1"] = """
  // SPDX-License-Identifier: GPL-3.0
  pragma solidity ^0.4.24;

  contract something {
    uint public coolade;
    address public checker;
    address public owner;
    mapping(address => bool) public whitelist;
    int public more = 0;
    int more12 = more7;  // This should remain more7
    int more13 = more12;  // This should be more7
    int256 public more3=23;
    int8 more2 =0;
    uint16 more4;
    uint8 more5=0;
    uint[4] coolade2;
    int[][2] coolade3;
    int[2] coolade4;
    address[] fishe;
    string[] arr;
    bool bool1 = false;
    bool bool2 = true;
    string str1 = "";
    string str2 = "aa";
    string str3 = '';
    address address1 = 0x0000000000000000000000000000000000000000;
    address address2 = 0x0000000000000000000000000000000000000123;
    bytes4 bytesVar1 = "";
    bytes4 bytesVar2 = "abcd";
    bytes4 bytesVar3 = 0;
    bytes4 public bytesVar4 = '';
    int bool3;
    int more6 = more3++;
    int more10 = more9 - 1;  // -1
    int more7 = 2 + 3;  // 5
    int more8 = more7 + 1;  // 6
    int more9 = (more8 - 2) * 2;  // 8
    int more11 = more6 / 2;

    int asdasd1 = 1;
    int asdasd2 = 2;
    uint32 asdasd3 = more8;  // Optimizer will not precalculate this
    uint16 asdasd4 = 4;

    event SetPrice1(uint256 price, address a);
    event SetPrice2(bytes32 price, string b);
    event SetPrice3(int256 price, string indexed c);
    event SetPrice4(uint256 indexed price, address indexed a);  // no change

    constructor() public {
      owner = msg.sender;
    }

    modifier onlyOwner() {
      require(msg.sender == owner);
      _;
    }

    function setChecker(address _checker) public onlyOwner {
      checker = _checker;
      for (int i = 0; i<10; i++) {
        more++;
      }

      //for (int i=0;i<5;i++) {  // <- Should give a space between code operations or it won't work.
      //  more ++;  // <- Increment should connect with the variable name or it won't work.
      //}

      bool3 = ++more;  // Okay to be replaced (optimal)
      bool3 = more++;  // Must not be replaced 1
      bool3=more++;  // Must not be replaced 2
      bool3 = more3 + more++;  // Must not be replaced 3
      bool3=more3+more++;  // Must not be replaced 4

      bool3 = 0;
      bool3=0;

      if (bool3 == 0) {
      }

      if (bool1 == true) {
      }

      if (true == true && false) {
      }

      if (true || false == true && false) {}

      if (true || false == (true)) {}

      if (true == true) {}

      if ((true == true) == (true == true) == (true == true == true)) {}

      if (true == true == true == true) {}

      if ((true == true) == (true == true) == (true == true == true) == (true == true == true == true)) {}

      if (true == true == true == true == true == true == true) {}

      if ((true == true) == (true == false) == (true == false == true) == (true == false == true == true)) {}

      if (false == true == true == false == true == false == true) {}

      if (((((true)))) == (((true))) == (true) == true) {}
      if (((((true)))) == (((true))) == (true) == false) {}
      if (((((true)))) == (((true))) == (false) == true) {}
      if (((((true)))) == (((true))) == (false) == false) {}
      if (false == false == false == false == false) {}
      if (false == false == false == false == false == false) {}

    }

    function approve(address _wallet) public onlyOwner {
      whitelist[_wallet] = true;
    }
  }
  """
  # Deployment: 681450 vs 654435  (original vs optimized)  [27015 diff (~3.96%)]
  # approve: 46238 vs 46238  (original vs optimized)  [0 diff]
  # setChecker: 79516 vs 64159  (original vs optimized)  [15357 diff (~19.31%)]

#print(file_contents)

{'1': '\n  // SPDX-License-Identifier: GPL-3.0\n  pragma solidity ^0.4.24;\n\n  contract something {\n    uint public coolade;\n    address public checker;\n    address public owner;\n    mapping(address => bool) public whitelist;\n    int public more = 0;\n    int more12 = more7;  // This should remain more7\n    int more13 = more12;  // This should be more7\n    int256 public more3=23;\n    int8 more2 =0;\n    uint16 more4;\n    uint8 more5=0;\n    uint[4] coolade2;\n    int[][2] coolade3;\n    int[2] coolade4;\n    address[] fishe;\n    string[] arr;\n    bool bool1 = false;\n    bool bool2 = true;\n    string str1 = "";\n    string str2 = "aa";\n    string str3 = \'\';\n    address address1 = 0x0000000000000000000000000000000000000000;\n    address address2 = 0x0000000000000000000000000000000000000123;\n    bytes4 bytesVar1 = "";\n    bytes4 bytesVar2 = "abcd";\n    bytes4 bytesVar3 = 0;\n    bytes4 public bytesVar4 = \'\';\n    int bool3;\n    int more6 = more3++;\n    int more10 

# Initializing Optimizer

In [107]:
### Defining functions and utilities
import re

### Options behavior
if VARIABLE_PACKING_BEST_FIT == True:
  VARIABLE_PACKING = True
if INDEX_EVENT_STRING_BYTES == True:
  INDEX_EVENT_VARIABLE = True

# Get contract version
#version = re.search('pragma solidity \^?(.*);', file_content).group(1)
# Get contract names
#name = re.findall('contract (.*) {', file_content)

# Utility functions
def swapPositionList(lst, pos1, pos2):  # Not used
  lst[pos1], lst[pos2] = lst[pos2], lst[pos1]
  return lst

def swapPositionDict(dct, key1, key2):  # Not used
  dct[key1], dct[key2] = dct[key2], dct[key1]
  return dct

def bitToByte(bit):
  return bit/8

def bestFit(processSize):
  formattedProcessSize = []

  # Format the processSize to fit with the format that can be used for final optimization
  for i in processSize:
    for j in processSize[i]:
      j["size"] = i
      formattedProcessSize += [j]

  n = len(formattedProcessSize)

  # Gerenate all page containers. All is set to 32 bytes by default (its maximum possible partition)
  blockSize = []
  for i in range(n):
    blockSize += [32]

  # Stores block id of the block allocated to a process
  allocation = {}

  # pick each process and find suitable blocks according to its size and assign to it
  for i in range(n):

    # Find the best fit block for current process
    bestIdx = -1
    for j in range(n):
      if blockSize[j] >= formattedProcessSize[i]["size"]:
        if bestIdx == -1:
          bestIdx = j
        elif blockSize[bestIdx] > blockSize[j]:
          bestIdx = j

    # If we could find a block for current process
    if bestIdx != -1:

      # allocate block j to process[i] process
      allocation.setdefault(bestIdx, []).append(formattedProcessSize[i])

      # Reduce available memory in this block.
      blockSize[bestIdx] -= formattedProcessSize[i]["size"]

  print("blockSize:", blockSize)
  print("processSize:", processSize)
  print("allocation:", allocation)

  # For checking if best fit occured
  alloc_list = []
  for i in allocation:
    alloc_list += allocation[i]

  print(formattedProcessSize)
  print(alloc_list)
  print("is", formattedProcessSize != alloc_list)

  return allocation, (formattedProcessSize != alloc_list)

def add_variable_island(byte_size, dict_line):
  global last_index_var
  global current_island_var
  global all_vars
  global tags

  # Ignore if it's a function / modifier / similar types
  ignore_case = (101, 102, 103, 105)
  for i in ignore_case:
    if re.search(tags[i], dict_line["value"]) != None:
      return

  # Start new island if not close to an island
  if last_index_var+1 < dict_line["index"]:
    current_island_var += 1
    all_vars.append({})
    all_vars[current_island_var]["metadata"] = {}
    all_vars[current_island_var]["metadata"]["start_index"] = dict_line["index"]
    all_vars[current_island_var]["metadata"]["total_line"] = 0
    all_vars[current_island_var]["metadata"]["byte_sizes"] = set()
    all_vars[current_island_var]["metadata"]["variables"] = {}

  # Add to island var if not added before
  if last_index_var != dict_line["index"]:  # Avoid adding the same line twice
    all_vars[current_island_var].setdefault(byte_size, []).append(dict_line)
    all_vars[current_island_var]["metadata"]["total_line"] += 1
    all_vars[current_island_var]["metadata"]["byte_sizes"].add(byte_size)
    if dict_line["var_name"] != None:
      all_vars[current_island_var]["metadata"]["variables"][dict_line["var_name"]] = dict_line["var_value"]
    last_index_var = dict_line["index"]

def get_var_name_value(code_line, default_value = "0"):
  # Doesn't work on mapping
  if "=" not in _line_data["value"]:
    value = default_value
    left_side = re.sub("( +)?;(.*$)?", "", code_line)  # Get the left side of ";", then strip any edge whitespaces
    name = left_side.split(" ")[-1]  # Variable name should be on the right-most of the string
  else:
    clean = re.sub("^(.*?)=( +)?|( +)?;(.*$)?", "", code_line)  # Delete "...= " and " ;..."
    value = clean
    left_side = re.sub("( +)?=(.*$)?", "", code_line)  # Delete " =..."
    name = left_side.split(" ")[-1]  # Variable name should be on the right-most of the string
  return value, name

# 0: Symbol opening/closing, 1: major opener, 2: default value issue, 3: data types, 4: visibility, 5: operator, 6: error handling
# Remember the ordering. First come, first serve
tags = {0: "{",
        1: "}",
        100: "contract ",
        101: " function ",
        102: " modifier ",
        103: " constructor",
        104: " if( +)?\(",  # Then checks for inefficient boolean
        105: " event(.*?)\(",  # Then checks for indexed variables in event
        201: "bool(.*?)( ?)=( ?)false",  # Default Values (bool)
        202: "(u?)int(.*?)( ?)=( ?)0",  # Default Values (int)
        203: "string(.*?)( ?)=( ?)(('')|(\"\"))",  # Default Values (string)
        204: "address(.*?)( ?)=( ?)0x0000000000000000000000000000000000000000",  # Default Values (address)
        205: "bytes(.*?)( ?)=( ?)((0(x(0+))?)|('')|(\"\"))",  # Default Values (bytes)
        301: " bool((\[([0-9]+)?\])+)? ",
        302: " int([0-9]+)?((\[([0-9]+)?\])+)? ",  # old: " int(.*?) "
        303: " uint([0-9]+)?((\[([0-9]+)?\])+)? ",
        304: " bytes([0-9]+)?((\[([0-9]+)?\])+)? ",
        305: " address((\[([0-9]+)?\])+)? ",
        306: " mapping\(",  # Variable length
        307: " string((\[([0-9]+)?\])+)? ",  # Variable length
        401: " public ",
        402: " private ",
        403: " external ",
        501: " for( |\()?",
        502: "\+\+( |\)|;)", # Should not include for "= x++" or math operations or conditionals as it may break SC functionality  # \+\+( |\)|;|<|>|=|\||\&|\+|\-|\*|/)
        503: "--( |\)|;)",
        504: "( +)?=( +)?",  # For precompute
        601: " require\(",
        602: " revert\(",
        603: " assert\(",  # No error message
        }  # token: tag

# Optimization Process

In [120]:
### Main process
### Variables for logging
log_text = "Log result:\n"

for file_index, file_name in enumerate(file_contents):
  file_content = file_contents[file_name]

  log_text += "File " + str(file_name) + ":\n"
  log_total_variable_packing = 0
  log_total_default_value = 0
  log_total_increment_operator = 0
  log_total_decrement_operator = 0
  log_total_precompute_variable = 0
  log_total_boolean_if_check = 0
  log_total_index_event_variable = 0


  ### Process:
  # Seperate source code to each lines -> list
  # Scan and mark each lines based on its characteristics -> dict
  # Optimize source code
  # Combine all source code lines back

  ### Variable unpacking
  # Scan for all variable islands
  # Sort each variable islands from big to small byte size

  ### Seperate source code to each lines -> list
  code_lines = file_content.split("\n")
  code_metadata = {}

  ### Variables for detecting issues
  all_vars = []  # Stores all islands of vars. Content: dict of byte sizes group
  current_island_var = -1
  last_index_var = -1  # To detect when the islands cut off.  if detected var > last_index_var+1
  sorted_var = []


  ### Scan and mark each lines based on its characteristics -> dict
  for index, value in enumerate(code_lines):
    metadata = [value]
    for token in tags:

      # https://pynative.com/python-regex-pattern-matching/
      # Check each code line for tag tokens
      if re.search(tags[token], value) != None:
        metadata += [token]
        _line_data = {"index": index, "value": value, "token": token}

        ## Default value fix
        if DEFAULT_VALUE == True:
          if token == 201:
            print(index, "index, default value bool issue found!!", value)

            fixed = re.sub("( ?)=( ?)false", "", value)
            #print("Fixed to:", fixed)
            value = fixed
            metadata[0] = fixed
            code_lines[index] = fixed
            log_text += "Default value fix: " + fixed + "\n"
            log_total_default_value += 1
          elif token == 202:
            print(index, "index, default value int issue found!!", value)

            fixed = re.sub("( ?)=( ?)0", "", value)
            #print("Fixed to:", fixed)
            value = fixed
            metadata[0] = fixed
            #_line_data["value"] = fixed
            code_lines[index] = fixed
            log_text += "Default value fix: " + fixed + "\n"
            log_total_default_value += 1
          elif token == 203:
            print(index, "index, default value string issue found!!", value)

            fixed = re.sub("( ?)=( ?)(('')|(\"\"))", "", value)
            #print("Fixed to:", fixed)
            value = fixed
            metadata[0] = fixed
            code_lines[index] = fixed
            log_text += "Default value fix: " + fixed + "\n"
            log_total_default_value += 1
          elif token == 204:
            print(index, "index, default value address issue found!!", value)

            fixed = re.sub("( ?)=( ?)0x0000000000000000000000000000000000000000", "", value)
            fixed = re.sub("( ?)=( ?)address\(0\)", "", fixed)
            #print("Fixed to:", fixed)
            value = fixed
            metadata[0] = fixed
            code_lines[index] = fixed
            log_text += "Default value fix: " + fixed + "\n"
            log_total_default_value += 1
          elif token == 205:
            print(index, "index, default value bytes issue found!!", value)

            fixed = re.sub("( ?)=( ?)((0(x(0+))?)|('')|(\"\"))", "", value)
            #print("Fixed to:", fixed)
            value = fixed
            metadata[0] = fixed
            code_lines[index] = fixed
            log_text += "Default value fix: " + fixed + "\n"
            log_total_default_value += 1

        ## Unoptimize operator use
        if INCREMENT_OPERATOR == True:
          if token == 502:  # x++
            print(index, "index, potentional unoptimal x++ use detected!", value)

            # Get variable name
            left_wall_var = " |\(|;"  # " |\(|\)|;|<|>|=|\||\&|\+|\-|\*|/"  # " |\(|\)|;"
            prohibited_chars = (">", "<", "=", "|", "&", "+", "-", "*", "/")
            line_split = value.split("++")

            for i in range(len(line_split)-1):
              # Cut from the right edge until hits the left wall
              var_splits = re.split(left_wall_var, line_split[i])
              left_var_name = var_splits[-2]
              var_name = var_splits[-1]

              #print("left_var_name:", left_var_name, "| var_name", var_name)

              # Check variable name and its left to not contain prohibited chars
              if (len(left_var_name) != 0 and any(x in left_var_name for x in prohibited_chars)) or any(x in var_name for x in prohibited_chars):
                #print("Prohibited char detected! Continued to next line.")
                pass

              elif var_name == "":
                #print("Empty variable found")
                pass

              else:
                fixed = value.replace(var_name+"++", "++"+var_name)

                value = fixed
                metadata[0] = fixed
                code_lines[index] = fixed
                #print("Fixed to:", fixed)
                log_text += "Increment operator fix: " + fixed + "\n"
                log_total_increment_operator += 1

        if DECREMENT_OPERATOR == True:
          if token == 503:  # x--
            print(index, "index, potentional unoptimal x-- use detected!", value)

            # Get variable name
            left_wall_var = " |\(|;"  # " |\(|\)|;|<|>|=|\||\&|\+|\-|\*|/"  # " |\(|\)|;"
            prohibited_chars = (">", "<", "=", "|", "&", "+", "-", "*", "/")
            line_split = value.split("--")

            for i in range(len(line_split)-1):
              # Cut from the right edge until hits the left wall
              var_splits = re.split(left_wall_var, line_split[i])
              left_var_name = var_splits[-2]
              var_name = var_splits[-1]

              #print("left_var_name:", left_var_name, "| var_name", var_name)

              # Check variable name and its left to not contain prohibited chars
              if (len(left_var_name) != 0 and any(x in left_var_name for x in prohibited_chars)) or any(x in var_name for x in prohibited_chars):
                #print("Prohibited char detected! Continued to next line.")
                pass

              elif var_name == "":
                #print("Empty variable found")
                pass

              else:
                fixed = value.replace(var_name+"--", "--"+var_name)

                value = fixed
                metadata[0] = fixed
                code_lines[index] = fixed
                #print("Fixed to:", fixed)
                log_text += "Decrement operator fix: " + fixed + "\n"
                log_total_decrement_operator += 1

        ## If check boolean
        if BOOLEAN_IF_CHECK == True:
          if token == 104:
            print(index, "index, if check detected", value)

            boolean_clean = False  # If True, no other inefficiencies found
            fix_count = 0

            while boolean_clean == False:
              boolean_clean = True

              # Check inefficient bool check for the right side
              statement = re.search(r'( +)?==( +)?(\(+)?( +)?true( +)?\)+', value)  # " == true )"
              if statement != None:
                statement = statement.group(0)

                # Check for brackets. Left bracket < Right bracket
                if statement.count('(') < statement.count(')'):

                  delta = statement.count(')') - statement.count('(')
                  fixed = value.replace(statement, ")"*delta)

                  #print("Fixed to:", fixed)
                  value = fixed
                  metadata[0] = fixed
                  code_lines[index] = fixed

                  boolean_clean = False
                  fix_count += 1

              # Check inefficient bool check for the left side
              statement = re.search(r'\(+( +)?true( +)?(\)+)?( +)?==( +)?', value)  # "( true == "
              if statement != None:
                statement = statement.group(0)

                # Check for brackets. Left bracket > Right bracket
                if statement.count('(') > statement.count(')'):

                  # Check how many brackets currently on the left
                  delta = statement.count('(') - statement.count(')')
                  fixed = value.replace(statement, "("*delta)

                  #print("Fixed to:", fixed)
                  value = fixed
                  metadata[0] = fixed
                  code_lines[index] = fixed

                  boolean_clean = False
                  fix_count += 1

              # Check inefficient bool check for == true ==
              statement = re.search(r'(\(+)?( +)?==( +)?(\)+)?( +)?true( +)?(\(+)?( +)?==( +)?(\)+)?', value)  # " == true == "
              if statement != None:
                statement = statement.group(0)

                # Check for brackets. Left bracket == Right bracket
                if statement.count('(') == statement.count(')'):

                  fixed = value.replace(statement, " == ")

                  #print("Fixed to:", fixed)
                  value = fixed
                  metadata[0] = fixed
                  code_lines[index] = fixed

                  boolean_clean = False
                  fix_count += 1

              # Check inefficient bool check for " == false == " should not happen.

              # Check inefficient bool check for true == true  (out of research scope)
              statement = re.search(r'\(+( +)?true( +)?(\)+)?(( +)?==( +)?(\(+)?( +)?true( +)?\)+)+\)+', value)  # " ( true [== true ]) "
              if statement != None:
                statement = statement.group(0)

                # Check for brackets. Left bracket == Right bracket
                if statement.count('(') == statement.count(')'):

                  fixed = value.replace(statement, "(true)")

                  #print("Fixed to:", fixed)
                  value = fixed
                  metadata[0] = fixed
                  code_lines[index] = fixed

                  boolean_clean = False
                  fix_count += 1

              # Check inefficient bool check for false == false  (out of research scope)
              statement = re.search(r'\(+( +)?false( +)?(\)+)?(( +)?==( +)?(\(+)?( +)?false( +)?(\)+)?)+\)+', value)  # " ( false [== false ]) "
              if statement != None:
                statement = statement.group(0)

                # Check for brackets. Left bracket == Right bracket
                if statement.count('(') == statement.count(')'):
                  equal_count = statement.count('==')

                  if equal_count % 2 == 0:
                    fixed = value.replace(statement, "(false)")
                  else:
                    fixed = value.replace(statement, "(true)")

                  #print("Fixed to:", fixed)
                  value = fixed
                  metadata[0] = fixed
                  code_lines[index] = fixed

                  boolean_clean = False
                  fix_count += 1

            if fix_count > 0:
              log_text += "Boolean if check fix: " + value + "\n"
              log_total_boolean_if_check += 1

        ## Index variables in event
        if INDEX_EVENT_VARIABLE == True:
          if token == 105:
            print(index, "index, event detected", value)

            # Check all variables in event - assume that the event is one line of code
            clean = re.sub("^(.*?)event(.*?)\(( +)?|( +)?\)( +)?;(.*$)?", "", value)  # Delete "...event x( " and " ) ;..."
            clean = re.split(", +?", clean)  # Split ",[ ]"
            indexed = sum("indexed" in s for s in clean)  # Count how many vars have been indexed
            fix_count = 0

            # Add "indexed" to variables if not 3 variables are indexed and less than total variables. First come, first serve mechanic.
            if (indexed < 3) and (indexed < len(clean)) and (indexed != 0):
              fixed = value
              for idx, var in enumerate(clean):
                if "indexed" not in var:
                  # Check data type. Skip if string or bytes
                  if INDEX_EVENT_STRING_BYTES == False:
                    var_type = var.split(" ", 1)
                    if "string" in var_type or "bytes" in var_type:
                      continue

                  # Add "indexed" keyword
                  split = var.rsplit(" ", 1)  # Split last occurance of whitespace only
                  clean[idx] = split[0] + " indexed " + split[1]

                  fixed = fixed.replace(var, split[0] + " indexed " + split[1])

                  fix_count += 1
                  indexed += 1
                  if indexed == 3:
                    break

              #print("fixed to:", fixed)
              value = fixed
              metadata[0] = fixed
              code_lines[index] = fixed
              if fix_count > 0:
                log_text += "Event variable fix: " + fixed + "\n"
                log_total_index_event_variable += 1

        ## Lazy precompute operation
        if PRECOMPUTE_VALUE == True:
          if token == 504:  # " = "
            print(index, "index, assign operator detected", value)

            if value.count("=") < 2:
              fixed = value

              # Clean the code line
              clean = re.sub("^(.*?)=( +)?|( +)?;(.*$)?", "", fixed)  # Delete "...= " and " ;..."

              # Evaluate per small splits (for cases of function arguments)
              splits = re.split('\(|\)|=|<|>|%|&|\||,', clean)
              splits = list(filter(None, splits))  # Delete empty elements

              for j in splits:
                try:
                  evaluated = str(eval(j))
                  fixed = fixed.replace(j, evaluated)
                except:
                  #print("Fail 1")
                  pass

              # Evaluate as a whole (for general case)
              try:
                fixed = str(eval(fixed))
              except:
                #print("Fail 2")
                pass

              #print("Result:", fixed)
              value = fixed
              metadata[0] = fixed
              code_lines[index] = fixed

        ## Scan variable islands and initial variable values
        #### Check if variable references an unknown variable value. If detected, don't assign var_name and var_value
        if token == 301:  # bool
          print(index, "index, bool detected")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = get_var_name_value(_line_data["value"], "false")

          # Check byte size
          size = re.search(r' bool(.*?) ', value).group(1)
          if size == "":
            byte_size = 1
          elif size.isdecimal() == False:
            byte_size = 33   # Variable size (array) is set to 33 by default because they can take up more than one page
          add_variable_island(byte_size, _line_data)  # Added the token variable to know what kind of var this is.

        elif token == 302:  # int
          print(index, "index, int detected")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = get_var_name_value(_line_data["value"], "0")

          # Check byte size
          size = re.search(r' int(.*?) ', value).group(1)
          if size == "":
            size = 256
          elif size.isdecimal() == False:
            # Check int bit size
            _temp_split = size.split("[")
            if _temp_split[0].isdecimal() == False:
              size = 256
            else:
              size = 264
          byte_size = int(size)//8
          add_variable_island(byte_size, _line_data)

        elif token == 303:  # uint
          print(index, "index, uint detected")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = get_var_name_value(_line_data["value"], "0")

          # Check byte size
          size = re.search(r' uint(.*?) ', value).group(1)
          if size == "":
            size = 256
          elif size.isdecimal() == False:
            # Check int bit size
            _temp_split = size.split("[")
            if _temp_split[0].isdecimal() == False:
              size = 256
            else:
              size = 264
          byte_size = int(size)//8
          add_variable_island(byte_size, _line_data)

        elif token == 304:  # bytes
          print(index, "index, bytes detected")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = get_var_name_value(_line_data["value"], "0x0")

          # Check byte size
          byte_size = re.search(r' bytes(.*?) ', value).group(1)
          if byte_size.isdecimal() == False:
            _temp_split = byte_size.split("[")
            if _temp_split[0] == "32":
              byte_size = 32
            else:
              byte_size = 33
          add_variable_island(int(byte_size), _line_data)

        elif token == 305:  # address
          print(index, "index, address detected on index")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = get_var_name_value(_line_data["value"], "0x0000000000000000000000000000000000000000")

          # Check byte size
          size = re.search(r' address(.*?) ', value).group(1)
          if size == "":
            byte_size = 20
          elif size.isdecimal() == False:
            byte_size = 33
          add_variable_island(byte_size, _line_data)

        elif token == 306:  # mapping
          print(index, "index, mapping detected")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = None, None

          add_variable_island(33, _line_data)  # Note: This is very variable, so take this with heavy grain of salt on its efficiency

        elif token == 307:  # string
          print(index, "index, string detected")

          # Get variable name and value
          _line_data["var_value"], _line_data["var_name"] = get_var_name_value(_line_data["value"], "''")

          add_variable_island(32.5, _line_data)  # Observed that setting this to 32 bytes is more gas efficient than 33

    code_metadata[index] = metadata

  ### Optimize source code
  #print("")
  # Variable optimization
  #print("Island count:", len(all_vars))
  for index, value in enumerate(all_vars):
    # Precompute variables
    if PRECOMPUTE_VARIABLE_VALUE == True:
      #print("Precompute vars!")

      # Fix all variable values first
      # Step 0. Delete variable from metadata variables if the value is used for increment / decrement
      del_variables = []
      for i in value["metadata"]["variables"]:
        var_value = value["metadata"]["variables"][i]

        # Eval value with numbers only
        splits = re.split('\(|\)| |/|\*|=|<|>|%|&|\|', var_value)
        splits = list(filter(None, splits))  # Delete empty elements

        for j in splits:
          if any(char in j for char in ("++", "--")):
            clean = re.sub("\+\+|--", "", j)
            del_variables.append(clean)

      for i in del_variables:
        if i in value["metadata"]["variables"][i]:
          del value["metadata"]["variables"][i]

      # Step 1. Substitute all possible variables
      # Don't touch value that has "++" or "--", should be deleted from the list if exists. (done while processing the var name or this loop)
      # Method: Substitute them as it learn on the go. (This is more aligned with how Solidity variable initialized work)
      if True:
        var_container = {}
        for i in value["metadata"]["variables"]:
          var_value = value["metadata"]["variables"][i]
          var_container[i] = value["metadata"]["variables"][i]
          #print(i, "=", var_value)
          #print(var_container)

          splits = re.split('\(|\)| |\+|-|/|\*|=|<|>|%|&|\||,', var_value)
          splits = list(filter(None, splits))  # Delete empty elements

          # Substitution
          for val in splits:
            if val in var_container:
              var_value = var_value.replace(val, var_container[val])
              var_container[i] = var_container[i].replace(val, var_container[val])
              value["metadata"]["variables"][i] = value["metadata"]["variables"][i].replace(val, value["metadata"]["variables"][val])

              log_text += "Precompute variable fix: " + val + " => " + value["metadata"]["variables"][i] + "\n"
              log_total_precompute_variable += 1

        #print(i, "=", value["metadata"]["variables"][i])

      # Method: Exhaustive search through all variables in a group (risky approach as referenced uninitialized variable can be assigned)
      else:
        for i in value["metadata"]["variables"]:
          var_value = value["metadata"]["variables"][i]
          #print(i, "=", var_value)
          has_fix = True

          while has_fix == True:
            has_fix = False
            splits = re.split('\(|\)| |\+|-|/|\*|=|<|>|%|&|\||,', var_value)
            splits = list(filter(None, splits))  # Delete empty elements

            # Substitution
            for val in splits:
              if val in value["metadata"]["variables"]:
                var_value = var_value.replace(val, value["metadata"]["variables"][val])
                value["metadata"]["variables"][i] = value["metadata"]["variables"][i].replace(val, value["metadata"]["variables"][val])
                has_fix = True

          #print(i, "=", value["metadata"]["variables"][i])

      # Step 2. Evaluate all literals
      if PRECOMPUTE_VALUE == True:
        for i in value["metadata"]["variables"]:
          var_value = value["metadata"]["variables"][i]

          # Eval value with numbers only
          if all(char not in var_value for char in ("x", "'", '"', "true", "false")):
            # Evaluate per small splits (for cases of function arguments)
            splits = re.split('\(|\)|=|<|>|%|&|\||,', var_value)
            splits = list(filter(None, splits))  # Delete empty elements

            for j in splits:
              try:
                evaluated = str(eval(j))
                var_value = var_value.replace(j, evaluated)
              except:
                pass
            value["metadata"]["variables"][i] = var_value

            # Evaluate as a whole (for general case)
            try:
              value["metadata"]["variables"][i] = str(eval(var_value))
            except:
              pass

          #print(i, "=", value["metadata"]["variables"][i])

      # Step 3. Assign precomputed values into the variables
      vars = []
      for i in value["metadata"]["byte_sizes"]:  # Select all byte sizes
        for idx, val in enumerate(value[i]):  # Select all elements in byte size list
          # Replace if variable value is not None, exists in variables metadata, and equals to the real variable value
          if (val["var_value"] != None) and (val["var_name"] in value["metadata"]["variables"]) and (val["var_value"] in val["value"]):
            value[i][idx]["value"] = val["value"].replace(val["var_value"], value["metadata"]["variables"][val["var_name"]])

            # Put them in code_lines (this step is needed if variable packing is set to False)
            code_lines[value[i][idx]["index"]] = value[i][idx]["value"]
            #print(value[i][idx]["index"])

    # Variable packing mechanic
    if VARIABLE_PACKING == True:
      ## Variable Packing
      if VARIABLE_PACKING_BEST_FIT == True:
        # Sort the variable based on their byte size in reverse and split based on their byte size
        sorted_var_big = {}
        sorted_var_small = {}
        sorted_var_list = sorted(value["metadata"]["byte_sizes"], reverse=True)
        for i in sorted_var_list:
          if i <= 31:
            sorted_var_small[i] = value[i]
          else:
            sorted_var_big[i] = value[i]

        # Do best fit algorithm only for byte size <= 31
        optimized_var_small, best_fit_occured = bestFit(sorted_var_small)

        # Log for changes
        if best_fit_occured == True:
          log_text += "Best fit Variable packing fix in group: " + str(index) + "\n"
          log_total_variable_packing += 1

        # Combine the big and optimized dictionaries into an array
        sorted_var_big = {**sorted_var_big, **optimized_var_small}
        sorted_var = []
        for i in sorted_var_big:
          sorted_var.extend(sorted_var_big[i])

        #print("sorted_var_big:", sorted_var_big)
        #print("sorted_var", sorted_var)

        # Organize source code to the optimized list
        for idx, val in enumerate(sorted_var):  # Paste the source code to the real list
          code_lines[value["metadata"]["start_index"]+idx] = val["value"]

      else:
        # Sort the variables based on their byte size in reverse
        # Reason to reverse order is to have larger byte size partitioned first, then the smaller byte size can fill in the remaining empty slots
        sorted_var = []
        for i in sorted(value["metadata"]["byte_sizes"], reverse=True):
          sorted_var.extend(value[i])

        #print("Sorted vars:", sorted_var)

        # Organize source code to be sorted
        #for idx, val in enumerate(sorted_var):  # Copy the source code in order
        #  sorted_var[idx] = code_lines[val]
        for idx, val in enumerate(sorted_var):  # Paste the source code to the real list
          code_lines[value["metadata"]["start_index"]+idx] = val["value"]

  print("")

  ### Combine all source code lines back
  code_optimized = '\n'.join(code_lines)

  #print("all_vars:", all_vars)
  print("code_optimized:", code_optimized)
  #code_metadata

  log_text += "Total variable groups: " + str(len(all_vars)) + "\n"
  # Total best fit can be inaccurate as best fit algorithm could swap variables for the same optimization
  #log_text += "Total best fit variable packing fix: " + str(log_total_variable_packing) + "\n"
  log_text += "Total default value fix: " + str(log_total_default_value) + "\n"
  log_text += "Total increment operation fix: " + str(log_total_increment_operator) + "\n"
  log_text += "Total decrement operation fix: " + str(log_total_decrement_operator) + "\n"
  log_text += "Total precomputed variable fix: " + str(log_total_precompute_variable) + "\n"
  log_text += "Total boolean if check fix: " + str(log_total_boolean_if_check) + "\n"
  log_text += "Total indexed event variable fix: " + str(log_total_index_event_variable) + "\n"
  log_text += "===================================================\n\n"

  # Output file
  with open(os.path.join(OUTPUT_DIR, file_name + "_optimized.sol"), "w") as text_file:
    text_file.write(code_optimized)

print(log_text)
with open("log.txt", "w") as text_file:
  text_file.write(log_text)

11 index, address detected on index
19 index, address detected on index
27 index, address detected on index
28 index, address detected on index
64 index, uint detected
89 index, bytes detected
100 index, address detected on index
101 index, bytes detected
102 index, string detected
119 index, address detected on index
120 index, bytes detected
121 index, uint detected
133 index, address detected on index
134 index, bytes detected
135 index, uint detected
136 index, string detected
139 index, bytes detected
149 index, bytes detected
160 index, address detected on index
161 index, bytes detected
162 index, string detected
164 index, bytes detected
174 index, bytes detected
185 index, address detected on index
186 index, bytes detected
187 index, string detected
189 index, bytes detected
200 index, address detected on index
201 index, bool detected
202 index, bytes detected
203 index, string detected
205 index, if check detected         if (success) {
206 index, if check detected         

## Zipping Output (Optional)

In [121]:
# Zipping output folder (if needed)
import shutil
shutil.make_archive("optimized", 'zip', OUTPUT_DIR)

'/content/optimized.zip'