# Project env init

In [None]:
!git clone https://github.com/smartbugs/smartbugs-curated

In [None]:
%cd smartbugs-curated/dataset/

In [None]:
!pip install --upgrade pip

In [None]:
!pip install tensorflow

In [None]:
!pip install py-solc-x

In [None]:
import solcx
print(solcx.get_installable_solc_versions())

# Generate dataset

In [None]:
# Parse smart contract + generate image

import re
from pathlib import Path
#from web3 import Web3
from solcx import compile_source


def find_pragma(contract):

  PRAGMA_REGEX = re.compile(r'pragma\s+solidity\s+([^;]+);', re.IGNORECASE)
  NUMERIC_VERSION_REGEX = re.compile(r'\d+\.\d+\.\d+|\d+\.\d+')

  match = PRAGMA_REGEX.search(contract)
  if not match:
    raise ValueError('No pragma statement found')

  raw_version = match.group(1).strip()
  version_match = NUMERIC_VERSION_REGEX.search(raw_version)

  if not version_match:
    raise ValueError(f'Could not parse Solidity version from "{raw_version}"')

  #print(version_match.group())
  return version_match.group()


def compile(contract, solidity_ver):
  solcx.install_solc(solidity_ver)
  solcx.set_solc_version(solidity_ver)

  compiled = compile_source(
      contract,
      output_values=['bin']
  )

  return compiled



In [None]:
from PIL import Image
import math

def bytecode2img(bytecode, target_size=(256, 256)):

    # Convert hex string to bytearray
    data = bytearray.fromhex(bytecode)

    # Determine initial image size
    length = len(data)
    width = int(math.sqrt(length))
    height = math.ceil(length / width)

    # Pad data if necessary to form a rectangle
    while len(data) < width * height:
        data.append(0)  # Pad with black pixels

    # Create initial image
    img = Image.frombytes('L', (width, height), bytes(data))

    # Resize and crop/pad to target size
    img = img.resize(target_size, Image.NEAREST)  # Resize, preserving aspect ratio
    img = img.crop((0, 0, target_size[0], target_size[1]))  # Crop or pad to 256x256

    return img

In [None]:
import os
def gen_img(file):
  path = Path(file).expanduser()

  with open(path, 'r', encoding='utf-8', errors='ignore') as f:

    contract = f.read()
    contract_lines = contract.split('\n')

    solidity_ver = find_pragma(contract)

    compiled = compile(contract, solidity_ver)

  for contract, bin in compiled.items():
    #print(f'contract: {contract}\nbin: {bin}\nbytecode: {bin["bin"]}')
    bytecode = bin['bin']
    img = bytecode2img(bytecode)

    # Create the directory if it doesn't exist
    output_dir = os.path.join(path.parent.parent, path.parent.name + '_IMG')  # Use parent dir name
    os.makedirs(output_dir, exist_ok=True)

    # Save the image in the new directory
    img_path = os.path.join(output_dir, contract.split(':')[1] + '.png')
    img.save(img_path)

In [None]:
gen_img('./denial_of_service/auction.sol')

In [None]:
gen_img('./reentrancy/0x01f8c4e3fa3edeb29e514cba738d87ce8c091d3f.sol')

In [None]:
import os

def process_sol_files(root_dir):
  for dirpath, dirnames, filenames in os.walk(root_dir):
    for filename in filenames:
      if filename.endswith('.sol'):
        file_path = os.path.join(dirpath, filename)
        try:
          gen_img(file_path)
          print(f'Processed: {file_path}')
        except Exception as e:
          print(f'Error processing {file_path}: {e}')

In [None]:
dataset_dir = './'
process_sol_files(dataset_dir)

In [None]:
def remove_png_files():
  for filename in os.listdir():
    if filename.endswith('.png'):
      try:
        os.remove(filename)
        print(f'Removed: {filename}')
      except OSError as e:
        print(f'Error removing {filename}: {e}')

remove_png_files()