In [183]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

train_data = pd.read_csv('new_train_data.csv')

# split the data
X_train, X_val, y_train, y_val = train_test_split(train_data[['node1', 'node2']], train_data['label'], test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# random forest classifier
model = RandomForestClassifier(n_estimators=1258, random_state=0)
model.fit(X_train_scaled, y_train)

# predict
val_preds = model.predict(X_val_scaled)

# calculate accuracy
accuracy = accuracy_score(y_val, val_preds)
print(f"Validation accuracy: {accuracy:.4f}")

test_data = pd.read_csv('new_test_data.csv')

test_data_scaled = scaler.transform(test_data[['node1', 'node2']])

predictions = model.predict(test_data_scaled)

# add a new column in the test_data
test_data['label'] = predictions

Validation accuracy: 0.8621


In [184]:
# Calculate precision, recall, and F1-score
precision = precision_score(test_data['label'], predictions)
recall = recall_score(test_data['label'], predictions)
f1 = f1_score(test_data['label'], predictions)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000


In [185]:
ans = test_data.drop(['node1', 'node2'], axis=1)
ans = ans.rename(columns={'label': 'ans'})

ans.to_csv('predicted_labels.csv', index=False)