## Loading Dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

## Load the dataset

In [2]:
# Load the dataset
file_path = 'smart_contract_dataset.csv'
data = pd.read_csv(file_path)

## Display the first few rows of the dataset


In [3]:
# Display the first few rows of the dataset
print("Dataset:")
display(data.head())

Dataset:


Unnamed: 0,Contract ID,Code Snippet,Function Call Patterns,Control Flow Graph,Opcode Sequence,Label
0,321,contract_kxcvnkkdehikzffrstdd,"{call22, call91}",graph_hwustzmbve,opcodes_crjjzspsaggqsoh,0
1,2831,contract_inrtorokbfgtbmqqrfjk,"{call25, call58}",graph_vaaulbmlrr,opcodes_uwnaspfahpzelcf,1
2,1565,contract_cassdopqvzbtfxnaawec,"{call27, call94}",graph_ludlnykwbz,opcodes_girbhvbardiqbhf,1
3,1863,contract_bslpgainjtjpswfnslkv,"{call17, call91}",graph_ulhotfijks,opcodes_dynbpiobgphbkaf,1
4,1280,contract_dokyhqhzzuhnfstfainq,"{call3, call74}",graph_dedlfupxux,opcodes_fvvcwjixiwbejte,0


## Feature Extraction


In [4]:
# Convert categorical columns to numeric using one-hot encoding
data_encoded = pd.get_dummies(data, columns=['Function Call Patterns', 'Control Flow Graph', 'Opcode Sequence'])
data_encoded

Unnamed: 0,Contract ID,Code Snippet,Label,"Function Call Patterns_{call1, call100}","Function Call Patterns_{call1, call52}","Function Call Patterns_{call1, call54}","Function Call Patterns_{call1, call55}","Function Call Patterns_{call1, call56}","Function Call Patterns_{call1, call57}","Function Call Patterns_{call1, call58}",...,Opcode Sequence_opcodes_zwvdwimlvzkibhx,Opcode Sequence_opcodes_zxxhkrpniuzyyeu,Opcode Sequence_opcodes_zyivqctefxfusfz,Opcode Sequence_opcodes_zyosnhjdyarpdoh,Opcode Sequence_opcodes_zywkznheeiyuhxq,Opcode Sequence_opcodes_zzclfagimalodtv,Opcode Sequence_opcodes_zzlffubtmzuhbxv,Opcode Sequence_opcodes_zzpnruuouiepivw,Opcode Sequence_opcodes_zzqqicnltjtofjm,Opcode Sequence_opcodes_zzqypjmlwncfamr
0,321,contract_kxcvnkkdehikzffrstdd,0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,2831,contract_inrtorokbfgtbmqqrfjk,1,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,1565,contract_cassdopqvzbtfxnaawec,1,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,1863,contract_bslpgainjtjpswfnslkv,1,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,1280,contract_dokyhqhzzuhnfstfainq,0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,143,contract_ohllqyxaphadwqnnaeoo,0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2996,781,contract_apabweduinuwfbygfzaf,0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2997,576,contract_gzbsclffnzmprbihlsmn,0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2998,324,contract_btmukxgkqvgzhrynbxgk,0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## Extract features and labels


In [5]:
# Extract features and labels
X = data_encoded.drop(columns=['Contract ID', 'Code Snippet', 'Label'])
y = data_encoded['Label']


display(X, y)

Unnamed: 0,"Function Call Patterns_{call1, call100}","Function Call Patterns_{call1, call52}","Function Call Patterns_{call1, call54}","Function Call Patterns_{call1, call55}","Function Call Patterns_{call1, call56}","Function Call Patterns_{call1, call57}","Function Call Patterns_{call1, call58}","Function Call Patterns_{call1, call59}","Function Call Patterns_{call1, call61}","Function Call Patterns_{call1, call62}",...,Opcode Sequence_opcodes_zwvdwimlvzkibhx,Opcode Sequence_opcodes_zxxhkrpniuzyyeu,Opcode Sequence_opcodes_zyivqctefxfusfz,Opcode Sequence_opcodes_zyosnhjdyarpdoh,Opcode Sequence_opcodes_zywkznheeiyuhxq,Opcode Sequence_opcodes_zzclfagimalodtv,Opcode Sequence_opcodes_zzlffubtmzuhbxv,Opcode Sequence_opcodes_zzpnruuouiepivw,Opcode Sequence_opcodes_zzqqicnltjtofjm,Opcode Sequence_opcodes_zzqypjmlwncfamr
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2996,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2997,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2998,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


0       0
1       1
2       1
3       1
4       0
       ..
2995    0
2996    0
2997    0
2998    0
2999    0
Name: Label, Length: 3000, dtype: int64

## Split the dataset into training, validation, and testing sets


In [6]:
# Split the dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


## Scale the features


In [7]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

## Hyperparameter tuning for Random Forest


In [8]:
# Hyperparameter tuning for Random Forest
rf_params = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

rf_model = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=3, n_jobs=-1)
rf_model.fit(X_train, y_train)
print("Best Random Forest Params:", rf_model.best_params_)

Best Random Forest Params: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


## Hyperparameter tuning for SVM


In [9]:
# Hyperparameter tuning for SVM
svm_params = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

svm_model = GridSearchCV(SVC(random_state=42), svm_params, cv=3, n_jobs=-1)
svm_model.fit(X_train, y_train)
print("Best SVM Params:", svm_model.best_params_)

Best SVM Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


## Gradient Boosting Classifier


In [10]:
# Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)

## Evaluate the models using validation set


In [11]:
# Evaluate the models using validation set
rf_val_pred = rf_model.predict(X_val)
svm_val_pred = svm_model.predict(X_val)
gb_val_pred = gb_model.predict(X_val)


print("Random Forest Validation Accuracy:", accuracy_score(y_val, rf_val_pred))
print("SVM Validation Accuracy:", accuracy_score(y_val, svm_val_pred))
print("Gradient Boosting Validation Accuracy:", accuracy_score(y_val, gb_val_pred))

Random Forest Validation Accuracy: 0.47555555555555556
SVM Validation Accuracy: 0.5311111111111111
Gradient Boosting Validation Accuracy: 0.47555555555555556


## Select the best model based on validation performance


In [12]:
# Select the best model based on validation performance
best_model = rf_model if accuracy_score(y_val, rf_val_pred) > max(accuracy_score(y_val, svm_val_pred), accuracy_score(y_val, gb_val_pred)) else (svm_model if accuracy_score(y_val, svm_val_pred) > accuracy_score(y_val, gb_val_pred) else gb_model)


## Evaluate the best model using the test set


In [13]:
# Evaluate the best model using the test set
test_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, test_pred))
print("Classification Report:")
print(classification_report(y_test, test_pred))


Test Accuracy: 0.5266666666666666
Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.76      0.63       233
           1       0.52      0.27      0.36       217

    accuracy                           0.53       450
   macro avg       0.52      0.52      0.49       450
weighted avg       0.52      0.53      0.50       450



## Static Analysis (Example: Identify reentrancy, integer overflow, and access control issues)


In [14]:
# Static Analysis (Example: Identify reentrancy, integer overflow, and access control issues)
def static_analysis(code_snippet):
    vulnerabilities = []
    if 'reentrancy' in code_snippet:
        vulnerabilities.append('Reentrancy')
    if 'integer overflow' in code_snippet:
        vulnerabilities.append('Integer Overflow')
    if 'access control' in code_snippet:
        vulnerabilities.append('Access Control')
    return vulnerabilities

data['Vulnerabilities'] = data['Code Snippet'].apply(static_analysis)
print("Static Analysis Results:")
display(data[['Contract ID', 'Vulnerabilities']].head())

Static Analysis Results:


Unnamed: 0,Contract ID,Vulnerabilities
0,321,[]
1,2831,[]
2,1565,[]
3,1863,[]
4,1280,[]


## Dynamic Analysis (Pseudo code as actual execution requires blockchain environment)


In [15]:
# Dynamic Analysis (Pseudo code as actual execution requires blockchain environment)
def dynamic_analysis(contract_id):
    # Placeholder for dynamic analysis code
    # Execute the smart contract in a controlled environment
    # Observe behavior and identify potential weaknesses
    return []

data['Dynamic Vulnerabilities'] = data['Contract ID'].apply(dynamic_analysis)
print("Dynamic Analysis Results:")
display(data[['Contract ID', 'Dynamic Vulnerabilities']].head())

Dynamic Analysis Results:


Unnamed: 0,Contract ID,Dynamic Vulnerabilities
0,321,[]
1,2831,[]
2,1565,[]
3,1863,[]
4,1280,[]


## Automated Scanning (Example: Real-time vulnerability identification)


In [16]:
# Automated Scanning (Example: Real-time vulnerability identification)
def automated_scanning(code_snippet):
    # Placeholder for automated scanning using blockchain development tools
    vulnerabilities = static_analysis(code_snippet)
    return vulnerabilities

data['Automated Scan Results'] = data['Code Snippet'].apply(automated_scanning)
print("Automated Scanning Results:")
print(data[['Contract ID', 'Automated Scan Results']].head())

Automated Scanning Results:
   Contract ID Automated Scan Results
0          321                     []
1         2831                     []
2         1565                     []
3         1863                     []
4         1280                     []


## Generate detailed reports for developers


In [17]:
# Generate detailed reports for developers
def generate_report(contract_id, vulnerabilities):
    report = f"Contract ID: {contract_id}\nVulnerabilities: {', '.join(vulnerabilities)}"
    return report

data['Reports'] = data.apply(lambda row: generate_report(row['Contract ID'], row['Automated Scan Results']), axis=1)
print("Generated Reports:")
print(data[['Contract ID', 'Reports']].head())

Generated Reports:
   Contract ID                               Reports
0          321   Contract ID: 321\nVulnerabilities: 
1         2831  Contract ID: 2831\nVulnerabilities: 
2         1565  Contract ID: 1565\nVulnerabilities: 
3         1863  Contract ID: 1863\nVulnerabilities: 
4         1280  Contract ID: 1280\nVulnerabilities: 


## Save the detailed reports to a file


In [18]:
# Save the detailed reports to a file
data[['Contract ID', 'Reports']].to_csv('smart_contract_reports.csv', index=False)
print("Reports saved to 'smart_contract_reports.csv'")


Reports saved to 'smart_contract_reports.csv'


The report file indicates that the automated scan did not identify any vulnerabilities for the listed smart contracts. Each contract's report shows the contract ID and an empty list of vulnerabilities. This might be due to several reasons, such as:

No Vulnerabilities Detected: The static analysis function might not have detected any of the predefined vulnerabilities (reentrancy, integer overflow, access control) in the code snippets.
Incomplete Static Analysis Function: The static analysis function might need to be enhanced to detect a broader range of vulnerabilities or to accurately identify the existing ones.
Data Issue: There might be an issue with the input data, such as the 'Code Snippet' column not containing the expected information for analysis.

In [19]:
import pandas as pd
import joblib

# Enhanced Static Analysis Function
def static_analysis(code_snippet):
    vulnerabilities = []
    if 'reentrancy' in code_snippet.lower():
        vulnerabilities.append('Reentrancy')
    if 'integer overflow' in code_snippet.lower() or 'uint256' in code_snippet.lower():
        vulnerabilities.append('Integer Overflow')
    if 'access control' in code_snippet.lower() or 'onlyOwner' in code_snippet.lower():
        vulnerabilities.append('Access Control')
    # Add more rules as necessary
    return vulnerabilities

# Apply Static Analysis
data['Vulnerabilities'] = data['Code Snippet'].apply(static_analysis)

# Automated Scanning using Static Analysis
def automated_scanning(code_snippet):
    return static_analysis(code_snippet)

data['Automated Scan Results'] = data['Code Snippet'].apply(automated_scanning)

# Generate Detailed Reports
def generate_report(contract_id, vulnerabilities):
    report = f"Contract ID: {contract_id}\nVulnerabilities: {', '.join(vulnerabilities)}"
    return report

data['Reports'] = data.apply(lambda row: generate_report(row['Contract ID'], row['Automated Scan Results']), axis=1)

# Save Detailed Reports to a File
report_file_path = 'smart_contract_reports_2.csv'
data[['Contract ID', 'Reports']].to_csv(report_file_path, index=False)

print("Reports saved to", report_file_path)


Reports saved to smart_contract_reports_2.csv


## Save the Models Using joblib

In [20]:
from joblib import dump

# Save RandomForest model
dump(rf_model, 'rf_model.joblib')

# Save SVM model
dump(svm_model, 'svm_model.joblib')

# Save Gradient Boosting model
dump(gb_model, 'gb_model.joblib')


['gb_model.joblib']

## Store the Models in an HDF5 File

In [22]:
import joblib
import h5py

# Paths for saving individual models
rf_path = 'rf_model.joblib'
svm_path = 'svm_model.joblib'
gb_path = 'gb_model.joblib'

# Save the models as .joblib files
joblib.dump(rf_model, rf_path)
joblib.dump(svm_model, svm_path)
joblib.dump(gb_model, gb_path)

# Create an HDF5 file and store the paths of the models
with h5py.File('models.h5', 'w') as h5file:
    h5file.create_dataset('rf_model_path', data=rf_path.encode('utf-8'))
    h5file.create_dataset('svm_model_path', data=svm_path.encode('utf-8'))
    h5file.create_dataset('gb_model_path', data=gb_path.encode('utf-8'))


## Load the Models from the HDF5 File

In [24]:
import joblib
import h5py

# Paths for saving individual models
rf_path = 'rf_model.joblib'
svm_path = 'svm_model.joblib'
gb_path = 'gb_model.joblib'

# Save the models as .joblib files
joblib.dump(rf_model, rf_path)
joblib.dump(svm_model, svm_path)
joblib.dump(gb_model, gb_path)

# Create an HDF5 file and store the paths of the models
with h5py.File('models.h5', 'w') as h5file:
    h5file.create_dataset('rf_model_path', data=rf_path.encode('utf-8'))
    h5file.create_dataset('svm_model_path', data=svm_path.encode('utf-8'))
    h5file.create_dataset('gb_model_path', data=gb_path.encode('utf-8'))


In [26]:
from joblib import dump, load
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

# Load your models using the correct version of scikit-learn
rf_model = load('rf_model.joblib')
svm_model = load('svm_model.joblib')
gb_model = load('gb_model.joblib')

# Re-save them with the current version
dump(rf_model, 'rf_model_new.joblib')
dump(svm_model, 'svm_model_new.joblib')
dump(gb_model, 'gb_model_new.joblib')


['gb_model_new.joblib']

In [1]:
import pandas as pd
import joblib
from joblib import dump
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder

# Save models using joblib
def save_models_and_labels(rf_model, svm_model, gb_model, label_encoders):
    # Save models
    dump(rf_model, 'rf_model.joblib')
    dump(svm_model, 'svm_model.joblib')
    dump(gb_model, 'gb_model.joblib')

    # Save label encoders
    with open('label_encoders.pkl', 'wb') as f:
        joblib.dump(label_encoders, f)

# Example usage
save_models_and_labels(rf_model, svm_model, gb_model, label_encoders)


NameError: name 'rf_model' is not defined

In [2]:
import joblib
from joblib import load

# Load models and label encoders
def load_models_and_labels():
    rf_model = load('rf_model.joblib')
    svm_model = load('svm_model.joblib')
    gb_model = load('gb_model.joblib')

    with open('label_encoders.pkl', 'rb') as f:
        label_encoders = joblib.load(f)
    
    return rf_model, svm_model, gb_model, label_encoders

# Example usage
rf_model, svm_model, gb_model, label_encoders = load_models_and_labels()


FileNotFoundError: [Errno 2] No such file or directory: 'label_encoders.pkl'

In [None]:
import pandas as pd

def collect_input(label_encoders):
    code_snippet = input("Enter Code Snippet: ")
    function_call_patterns = input("Enter Function Call Patterns: ")
    control_flow_graph = input("Enter Control Flow Graph: ")
    opcode_sequence = input("Enter Opcode Sequence: ")

    # Encode the inputs
    encoded_input = {
        'Code Snippet': label_encoders['Code Snippet'].transform([code_snippet])[0],
        'Function Call Patterns': label_encoders['Function Call Patterns'].transform([function_call_patterns])[0],
        'Control Flow Graph': label_encoders['Control Flow Graph'].transform([control_flow_graph])[0],
        'Opcode Sequence': label_encoders['Opcode Sequence'].transform([opcode_sequence])[0]
    }

    return pd.DataFrame([encoded_input])

def make_predictions(user_input_df, rf_model, svm_model, gb_model):
    rf_predictions = rf_model.predict(user_input_df)
    svm_predictions = svm_model.predict(user_input_df)
    gb_predictions = gb_model.predict(user_input_df)

    return rf_predictions, svm_predictions, gb_predictions

# Example usage
user_input_df = collect_input(label_encoders)
rf_predictions, svm_predictions, gb_predictions = make_predictions(user_input_df, rf_model, svm_model, gb_model)

print("\nPredictions:")
print(f"RandomForest: {rf_predictions[0]}")
print(f"SVM: {svm_predictions[0]}")
print(f"GradientBoosting: {gb_predictions[0]}")


### Testing Values

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder

# Input data as a dictionary
data = {
    'Contract ID': [321, 2831],
    'Code Snippet': ['contract_kxcvnkkdehikzffrstdd', 'contract_inrtorokbfgtbmqqrfjk'],
    'Function Call Patterns': ['{call22, call91}', '{call25, call58}'],
    'Control Flow Graph': ['graph_hwustzmbve', 'graph_vaaulbmlrr'],
    'Opcode Sequence': ['opcodes_crjjzspsaggqsoh', 'opcodes_uwnaspfahpzelcf'],
    'Label': [0, 1]
}

# Load the data into a pandas DataFrame
df = pd.DataFrame(data)

# Preprocessing: Encoding categorical features
label_encoders = {}
for column in ['Code Snippet', 'Function Call Patterns', 'Control Flow Graph', 'Opcode Sequence']:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

# Features and labels
X = df.drop(columns=['Contract ID', 'Label'])
y = df['Label']

# Load your pre-trained models (for demonstration, we're using new models here)
rf_model = RandomForestClassifier()
svm_model = SVC(probability=True)
gb_model = GradientBoostingClassifier()

# Train the models (in practice, you'd load pre-trained models)
rf_model.fit(X, y)
svm_model.fit(X, y)
gb_model.fit(X, y)

# Make predictions using each model
rf_predictions = rf_model.predict(X)
svm_predictions = svm_model.predict(X)
gb_predictions = gb_model.predict(X)

# Combine predictions
predictions = pd.DataFrame({
    'RandomForest': rf_predictions,
    'SVM': svm_predictions,
    'GradientBoosting': gb_predictions
})

# Output the predictions
print("Predictions:\n", predictions)

# Sample input for prediction (you can modify these values as needed)
sample_input = pd.DataFrame({
    'Code Snippet': label_encoders['Code Snippet'].transform(['contract_kxcvnkkdehikzffrstdd']),
    'Function Call Patterns': label_encoders['Function Call Patterns'].transform(['{call22, call91}']),
    'Control Flow Graph': label_encoders['Control Flow Graph'].transform(['graph_hwustzmbve']),
    'Opcode Sequence': label_encoders['Opcode Sequence'].transform(['opcodes_crjjzspsaggqsoh'])
})

# Make predictions on the sample input
sample_rf_prediction = rf_model.predict(sample_input)
sample_svm_prediction = svm_model.predict(sample_input)
sample_gb_prediction = gb_model.predict(sample_input)

print("\nSample Input Predictions:")
print(f"RandomForest: {sample_rf_prediction[0]}")
print(f"SVM: {sample_svm_prediction[0]}")
print(f"GradientBoosting: {sample_gb_prediction[0]}")


Predictions:
    RandomForest  SVM  GradientBoosting
0             0    0                 0
1             1    1                 1

Sample Input Predictions:
RandomForest: 0
SVM: 0
GradientBoosting: 0


In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder

# Sample data for label encoding
# data = {
#     'Code Snippet': ['contract_kxcvnkkdehikzffrstdd', 'contract_inrtorokbfgtbmqqrfjk'],
#     'Function Call Patterns': ['{call22, call91}', '{call25, call58}'],
#     'Control Flow Graph': ['graph_hwustzmbve', 'graph_vaaulbmlrr'],
#     'Opcode Sequence': ['opcodes_crjjzspsaggqsoh', 'opcodes_uwnaspfahpzelcf']
# }

# # Create label encoders
# label_encoders = {}
# for column in data:
#     label_encoders[column] = LabelEncoder()
#     label_encoders[column].fit(data[column])

# # Create the DataFrame
# df = pd.DataFrame(data)
# df['Label'] = [0, 1]  # Dummy labels for training

# # Features and labels
# X = df.drop(columns=['Label'])
# y = df['Label']

# Load your pre-trained models (for demonstration, we're using new models here)
rf_model = RandomForestClassifier()
svm_model = SVC(probability=True)
gb_model = GradientBoostingClassifier()

# Train the models (in practice, you'd load pre-trained models)
rf_model.fit(X, y)
svm_model.fit(X, y)
gb_model.fit(X, y)

# Collect user input
def collect_input():
    code_snippet = input("Enter Code Snippet: ")
    function_call_patterns = input("Enter Function Call Patterns: ")
    control_flow_graph = input("Enter Control Flow Graph: ")
    opcode_sequence = input("Enter Opcode Sequence: ")

    # Encode the inputs
    encoded_input = {
        'Code Snippet': label_encoders['Code Snippet'].transform([code_snippet])[0],
        'Function Call Patterns': label_encoders['Function Call Patterns'].transform([function_call_patterns])[0],
        'Control Flow Graph': label_encoders['Control Flow Graph'].transform([control_flow_graph])[0],
        'Opcode Sequence': label_encoders['Opcode Sequence'].transform([opcode_sequence])[0]
    }

    return pd.DataFrame([encoded_input])

# Make predictions on the user input
user_input_df = collect_input()

rf_predictions = rf_model.predict(user_input_df)
svm_predictions = svm_model.predict(user_input_df)
gb_predictions = gb_model.predict(user_input_df)

print("\nPredictions:")
print(f"RandomForest: {rf_predictions[0]}")
print(f"SVM: {svm_predictions[0]}")
print(f"GradientBoosting: {gb_predictions[0]}")



Predictions:
RandomForest: 1
SVM: 1
GradientBoosting: 1


In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder

# Load data from CSV file
csv_file_path = 'smart_contract_dataset.csv'  # Replace with your actual file path
df = pd.read_csv(csv_file_path)

# Create label encoders for each feature
label_encoders = {}
for column in ['Code Snippet', 'Function Call Patterns', 'Control Flow Graph', 'Opcode Sequence']:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

# Features and labels
X = df.drop(columns=['Label','Contract ID'])
y = df['Label']

# Load your pre-trained models (for demonstration, we're using new models here)
rf_model = RandomForestClassifier()
svm_model = SVC(probability=True)
gb_model = GradientBoostingClassifier()

# Train the models (in practice, you'd load pre-trained models)
rf_model.fit(X, y)
svm_model.fit(X, y)
gb_model.fit(X, y)

# Collect user input
def collect_input():
    code_snippet = input("Enter Code Snippet: ")
    function_call_patterns = input("Enter Function Call Patterns: ")
    control_flow_graph = input("Enter Control Flow Graph: ")
    opcode_sequence = input("Enter Opcode Sequence: ")

    # Encode the inputs using the fitted label encoders
    encoded_input = {
        'Code Snippet': label_encoders['Code Snippet'].transform([code_snippet])[0],
        'Function Call Patterns': label_encoders['Function Call Patterns'].transform([function_call_patterns])[0],
        'Control Flow Graph': label_encoders['Control Flow Graph'].transform([control_flow_graph])[0],
        'Opcode Sequence': label_encoders['Opcode Sequence'].transform([opcode_sequence])[0]
    }

    return pd.DataFrame([encoded_input])

# Make predictions on the user input
user_input_df = collect_input()

rf_predictions = rf_model.predict(user_input_df)
svm_predictions = svm_model.predict(user_input_df)
gb_predictions = gb_model.predict(user_input_df)

print("\nPredictions:")
print(f"RandomForest: {rf_predictions[0]}")
print(f"SVM: {svm_predictions[0]}")
print(f"GradientBoosting: {gb_predictions[0]}")



Predictions:
RandomForest: 0
SVM: 0
GradientBoosting: 0
