In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer

# Load your dataset
data = pd.read_csv('Test_Ethereum_Raw.csv')

# Using CountVectorizer to transform hexadecimal strings into a token frequency matrix
# The ngram_range can be adjusted to capture different patterns, such as pairs of hex digits
vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 2))
X = vectorizer.fit_transform(data['Address'])

# Prepare target
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Label'])  # Ensure labels are 0 and 1

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [13]:
import random

# Load the CSV file
df = pd.read_csv('RawTestData.csv', header=None)  # Adjust the path and add `header=None` if the CSV has no header row

# Ensure that the DataFrame is reading correctly
print("Original Data:")
print(df.head())

# Randomize the values in the first column
randomized_values = df[0].tolist()  # Convert the first column to a list
random.shuffle(randomized_values)   # Shuffle the list
df[0] = randomized_values           # Assign the shuffled list back to the first column

# Save the modified DataFrame back to a CSV file
df.to_csv('RandomizedRawTestData.csv', index=False, header=None)  # `header=None` ensures no header row is added in the output

print("Modified Data:")
print(df.head())


Original Data:
                                            0
0  0x8BfA61FF095Ce5882f70beb99d79D4e85F364ae7
1  0x00CBF506B6C1a81E7dCAaD2EB1a002504cb1547B
2  0x6A25400fBd9c9EFd5d6CD85eFDfF0D799Aa38327
3  0x6Ea4Fc9a9e9574763Ccc36340E5fb91037CB2899
4  0x76aa1CDb0EA3DcaDA436AAbF4a80C466653C28f9
Modified Data:
                                                   0
0  0x9ea3003b8baa0b9e7ce3fc21c87191d0755585dafffb...
1  0x1897b75c07fce87753d396d02e19cbb2b2bedf1465c1...
2         0x7edeC34414375172b417533dE03DA6Ce038884bD
3         0xFA0D70f6A0BEaF0F40D193b91568c1F2aA059D57
4         0x3614fAF3e25246888801C27Cb0B1307b3FFA3cd9


In [17]:
# Example of predicting a new dataset
new_data = pd.read_csv('RandomizedRawTestData.csv', header=None)
new_X = vectorizer.transform(new_data[0])

new_predictions = model.predict(new_X)
new_data['Predicted Label'] = label_encoder.inverse_transform(new_predictions)  # Add predictions to the dataframe
new_data.to_csv('RawPredictions.csv', index=False, header=None)  # Save the predictions to a new CSV

print("Predictions have been saved to 'RawPredictions.csv'.")

Predictions have been saved to 'path_to_your_predictions_csv_file.csv'.
