In [9]:
import pandas as pd
import numpy as np
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [2]:
def generate_data(num_samples):
    data = []
    for _ in range(num_samples):
        src_device = random.choice(list(devices.values()))
        dest_device = random.choice(list(devices.values()))
        data_size = random.randint(50, 500)
        network_load = random.randint(10, 90)
        previous_route = random.choice(list(devices.values()))
        bandwidth = random.randint(100, 1000)
        latency = random.randint(1, 20)
        error_rate = random.uniform(0, 0.05)
        hop_count = random.randint(1, 5)

        if network_load > 70:
            optimal_route = (src_device + 1) % len(devices)
        elif data_size > 250:
            optimal_route = (src_device + 2) % len(devices)
        elif latency > 10:
            optimal_route = (src_device + 3) % len(devices)
        else:
            optimal_route = (src_device + 4) % len(devices)

        data.append([src_device, dest_device, data_size, network_load, previous_route, bandwidth, latency, error_rate, hop_count, optimal_route])
    return pd.DataFrame(data, columns=['src_device', 'dest_device', 'data_size', 'network_load', 'previous_route', 'bandwidth', 'latency', 'error_rate', 'hop_count', 'optimal_route'])

In [3]:
# Simulating a network of IoT devices
devices = {
    'device_0': 0,
    'device_1': 1,
    'device_2': 2,
    'device_3': 3,
    'device_4': 4,
    'device_5': 5,
    'device_6': 6,
    'device_7': 7,
    'device_8': 8,
    'device_9': 9,
    'device_10': 10,
    'device_11': 11,
    'device_12': 12,
    'device_13': 13,
    'device_14': 14
}

In [4]:
# Generating training data
training_data = generate_data(5000)

# Generating test data
test_data = generate_data(5000)


In [5]:
# Features and target for training data
X_train = training_data.drop('optimal_route', axis=1)
y_train = training_data['optimal_route']

# Features and target for test data
X_test = test_data.drop('optimal_route', axis=1)
y_test = test_data['optimal_route']

In [6]:
# Create and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate the accuracy on the test set
accuracy = accuracy_score(y_test, predictions)
print('Accuracy on test set:', accuracy)

Accuracy on test set: 0.8632


In [11]:
# Implementing k-fold cross-validation (let's use 10 folds as an example)
k = 10
accuracy_scores = cross_val_score(model, X_train, y_train, cv=k, scoring='accuracy')

# Print the accuracy for each fold
for i, score in enumerate(accuracy_scores, start=1):
    print(f"Fold {i}: {score:.4f}")

# Calculate and print the average accuracy
average_accuracy = np.mean(accuracy_scores)
print('\nAverage Accuracy:', average_accuracy)

Fold 1: 0.8560
Fold 2: 0.8620
Fold 3: 0.8700
Fold 4: 0.8580
Fold 5: 0.8640
Fold 6: 0.8660
Fold 7: 0.8600
Fold 8: 0.8720
Fold 9: 0.8580
Fold 10: 0.8640
Average Accuracy: 0.8620
