In [6]:
import pandas as pd
import random

# Function to generate a random string for code snippets, function calls, graphs, and opcodes
def random_string(prefix, length=10):
    return f"{prefix}_" + ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=length))

# Generate dataset
data = {
    'Contract ID': range(1, 101),
    'Code Snippet': [random_string('contract', 20) for _ in range(100)],
    'Function Call Patterns': [f"{{call{random.randint(1, 50)}, call{random.randint(51, 100)}}}" for _ in range(100)],
    'Control Flow Graph': [random_string('graph', 10) for _ in range(100)],
    'Opcode Sequence': [random_string('opcodes', 15) for _ in range(100)],
    'Label': [0]*50 + [1]*50  # First 50 are secure (0), next 50 are insecure (1)
}

# Create DataFrame
df = pd.DataFrame(data)

# Shuffle the rows to mix secure and insecure contracts
df = df.sample(frac=1).reset_index(drop=True)

# Display the first 5 rows of the dataset
print(df.head())

# Save to CSV file
df.to_csv('smart_contract_dataset.csv', index=False)


   Contract ID                   Code Snippet Function Call Patterns  \
0           75  contract_nomymkwuetidtyituiad       {call32, call51}   
1            1  contract_tlpjcxbhhlrinyrmwrnw       {call40, call56}   
2           15  contract_awiaoloircyvwfwqeewk       {call16, call71}   
3           97  contract_tpfdushtjhabhbagxfvd       {call15, call89}   
4           50  contract_gsrtucyuqotuuqxuqvpw       {call20, call93}   

  Control Flow Graph          Opcode Sequence  Label  
0   graph_radnzkgzyk  opcodes_hlwfvfuffcgkmmb      1  
1   graph_rahjavoncb  opcodes_spdzpbslgdphzmp      0  
2   graph_vnrqxvfezq  opcodes_zrlprhinfozypkn      0  
3   graph_qvcqyyrxtp  opcodes_wucwlwtfiwxzbir      1  
4   graph_tovjujkwsc  opcodes_bflqajezynnijvm      0  


In [2]:
import pandas as pd
import random

# Function to generate a random string for code snippets, function calls, graphs, and opcodes
def random_string(prefix, length=10):
  return f"{prefix}_" + ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=length))

# Generate dataset with 3000 rows
data = {
  'Contract ID': range(1, 3001),  # Changed range to 1-3000
  'Code Snippet': [random_string('contract', 20) for _ in range(3000)],
  'Function Call Patterns': [f"{{call{random.randint(1, 50)}, call{random.randint(51, 100)}}}" for _ in range(3000)],
  'Control Flow Graph': [random_string('graph', 10) for _ in range(3000)],
  'Opcode Sequence': [random_string('opcodes', 15) for _ in range(3000)],
  'Label': [0] * 1500 + [1] * 1500  # 1500 secure, 1500 insecure
}

# Create DataFrame
df = pd.DataFrame(data)

# Shuffle the rows to mix secure and insecure contracts
df = df.sample(frac=1).reset_index(drop=True)

# Display the first 5 rows of the dataset (optional)
display(df.head())

# Save to CSV file
df.to_csv('smart_contract_dataset.csv', index=False)


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Define the number of rows
num_rows = 3000

# Generate synthetic data
np.random.seed(42)
contract_ids = np.arange(1, num_rows + 1)
code_snippets = [f'contract Contract{cid} {{ ... }}' for cid in contract_ids]
function_call_patterns = [f'{{call{np.random.randint(1, 10)}, call{np.random.randint(10, 20)}}}' for _ in range(num_rows)]
control_flow_graphs = [f'graph{np.random.randint(1, 10)}' for _ in range(num_rows)]
opcode_sequences = [f'opcodes{np.random.randint(1, 10)}' for _ in range(num_rows)]
labels = np.random.randint(0, 2, size=num_rows)  # 0 for secure, 1 for insecure

# Create DataFrame
data = {
    'Contract ID': contract_ids,
    'Code Snippet': code_snippets,
    'Function Call Patterns': function_call_patterns,
    'Control Flow Graph': control_flow_graphs,
    'Opcode Sequence': opcode_sequences,
    'Label': labels
}

df = pd.DataFrame(data)

df.to_csv('test_!.csv', index=False)




# # Split the dataset
# train, temp = train_test_split(df, test_size=0.4, random_state=42)
# val, test = train_test_split(temp, test_size=0.5, random_state=42)

# # Save the datasets to CSV files
# train.to_csv('/mnt/data/train.csv', index=False)
# val.to_csv('/mnt/data/validation.csv', index=False)
# test.to_csv('/mnt/data/test.csv', index=False)

# # Display the first few rows of each set
# print("Training Set:\n", train.head())
# print("Validation Set:\n", val.head())
# print("Test Set:\n", test.head())
