In [None]:
!pip3 install pandas
!pip3 install matplotlib

In [2]:
# Imports
import threading
import random
import time
import csv
import datetime

In [3]:
# Server class
class Server:
    # Constructor
    def __init__(self, limit):
        self.limit = limit                      # limit for number of workers
        self.current_workers = 0                # current number of workers who are connected
        self.lock = threading.Lock()            # mutex lock for when max workers reached
        self.stop_event = threading.Event()     # enables us to stop the server and workers after a timelimit so we can print worker metrics
        self.state = {}
        self.all_states = []

    # A worker connects
    def connect(self):
        with self.lock:                             # mutex for multithreading
            if self.current_workers < self.limit:   # check current num workers against worker limit
                self.current_workers += 1           # increment number of current workers
                return True                         # successful connection
            return False                            # connection refused

    # A worker disconnects
    def disconnect(self):
        with self.lock:                 # mutex for multithreading
            self.current_workers -= 1   # decrement number of current workers

    # For stopping server after we have run the sim for the desired amount of time
    def stop(self):
        self.stop_event.set()   # stops the threading

# Worker class
class Worker(threading.Thread):
    # Constructor
    def __init__(self, server, priority='normal', identifier=-1, backoff_time=2):
        super().__init__()
        self.server = server              # server instance for connecting/deconnecting
        self.priority = priority          # priority level of worker
        self.backoff_time = backoff_time  # initial backoff time
        self.init_backoff_time = backoff_time
        self.total_work_time = 0          # track total work time to see how starved/successful worker is
        self.work_time = 0                # track work time each connection/work/disconnection cycle
        self.backoff_count = 0            # track total number of backoffs (failed connection attempts)
        self.connection_attempts = 0      # trac total number of connection attempts
        self.connection_probability = 0   # Connection probability = 1 - (backoff_count / connection attempts)
        self.id = identifier

    # Worker loop for simulating connection/work/disconnection cycle
    def run(self):
        while not self.server.stop_event.is_set():  # Until the threading event ends
            connected = self.server.connect()       # Attempt connection
            self.connection_attempts += 1
            if connected:                           # If connection successful
                #print(f"{self.name}: Connected to server.")
                self.backoff_time = self.init_backoff_time  # Reset backoff time
                self.server.state[f"w{self.id}_backoff"] = -1
                try:
                    while not self.server.stop_event.is_set():  # If threading event isn't ending
                        time.sleep(1)                           # Sleep 1 second every cycle while connected, this second represents a second of work performed on server
                        self.work_time += 1                     # Track work
                        if random.random() < 0.25:              # Chance to disconnect every 1 second cycle of work
                            break
                finally:
                    self.server.disconnect()                    # Disconnect from server
                    self.total_work_time += self.work_time      # Update work total
                    self.work_time = 0                          # Reset cycle tracker
                    time.sleep(random.uniform(1, 3))            # Backoff for some time so the worker doesn't immediately reconnect and starve out the others
            else:                                   # Connection Failed
                self.backoff_count += 1
                self.handle_backoff()               # Handle backoff based on priority of worker

    # Handle worker backoff after connection refusal based on the priority of the worker
    def handle_backoff(self):
        self.server.state[f"w{self.id}_backoff"] = int(self.backoff_time) + 1
        time.sleep(self.backoff_time)
        self.backoff_time = min(self.backoff_time * 2, self.init_backoff_time)  # double backoff time every backoff

    # Get the total time a worker has performed work while connected to server
    def get_total_work_time(self):
        return self.total_work_time     # Simpy return tracker variable

# Function for lifecycle of server. For example, when we time.sleep(300), we get 5 minutes of server life
def server_lifecycle(server, workers, lifecycle=300):
    # Run server for 5 minutes
    for t in range(lifecycle):
        time.sleep(1)
        for key in server.state:
            if server.state[key] > 0:
                server.state[key] = server.state[key] - 1
        server.all_states.append(server.state.copy())

    server.stop()           # After the <lifecycle> seconds have elapsed, stop the server
    for worker in workers:  # This loop prints the worker metrics
        worker.join()


In [4]:
# Instantiate server
def simulate(server, worker_count=20, server_life=30, backoff_times=[1, 2, 4]):
    workers = []
    for i in range(worker_count):
        if i % 3 == 0:
            worker = Worker(server, priority='high', identifier=i, backoff_time=backoff_times[0])
        elif i % 3 == 1:
            worker = Worker(server, priority='normal', identifier=i, backoff_time=backoff_times[1])
        else:
            worker = Worker(server, priority='low', identifier=i, backoff_time=backoff_times[2])
        workers.append(worker)
        worker.start()

    for i in range(len(workers)):
        server.state[f"w{i}_backoff"] = 0

        prio = 0
        if workers[i].priority == 'normal':
            prio = -1
        if workers[i].priority == 'high':
            prio = -2
        server.state[f"w{i}_priority"] = prio

    # Start server threading, send workers to server
    lifecycle_thread = threading.Thread(target=server_lifecycle, args=(server, workers, server_life))
    lifecycle_thread.start()
    lifecycle_thread.join()
    return workers


In [5]:
def write_to_csv(workers, subdir=None, confirm_filename=False):
    # Write worker information to CSV
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    csv_file = f"workers_{timestamp}.csv"
    if subdir:
        csv_file = os.path.join(subdir, f"workers_{timestamp}.csv")

    header = ["id", "priority", "total_work_time", "backoff_count", "connection_attempts", "connection_probability", "backoff_time"]
    # Open CSV file
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        # Loop through workers and write their information to the CSV
        for i in range(len(workers)):
            current_write = workers[i]
            current_write.connection_probability = 0
            if current_write.connection_attempts != 0:
                current_write.connection_probability = (1-(current_write.backoff_count/current_write.connection_attempts))

            worker_info = {"id": i, "priority": current_write.priority, "total_work_time": current_write.total_work_time, "backoff_count": current_write.backoff_count, "connection_attempts": current_write.connection_attempts, "connection_probability": current_write.connection_probability, "backoff_time": current_write.init_backoff_time}
            writer.writerow(worker_info)
    
    if confirm_filename:
        print(f"Worker information written to {csv_file}")
    return csv_file


In [None]:
import random

CONN_LIMIT = 5
SIM_RUNS = 0 #1500
worker_count = 20
server_life = 30
for i in range(SIM_RUNS):
    HIGH_BACKOFF = random.uniform(0.1, 2)
    MED_BACKOFF = random.uniform(.8, 4)
    LOW_BACKOFF = random.uniform(3, 7)
    print(f"Simulation {i}: Worker count: {worker_count}, High: {HIGH_BACKOFF}, Med: {MED_BACKOFF}, Low: {LOW_BACKOFF}")

    # Instantiate server
    server = Server(limit=CONN_LIMIT)
    workers = simulate(server, worker_count=worker_count, server_life=server_life, backoff_times=[HIGH_BACKOFF, MED_BACKOFF, LOW_BACKOFF])
    write_to_csv(workers)

print("Simulation complete. Worker information written to CSV files.")


In [7]:
import os
import pandas as pd

In [None]:
# create data for neural network. 
# input features: high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob, limit, high_num_workers, normal_num_workers, low_num_workers
# output: high_backoff, normal_backoff, low_backoff
def create_data(df):
    # input features
    high_target_connection_prob = df[df['priority'] == 'high']['connection_probability'].mean()
    normal_target_connection_prob = df[df['priority'] == 'normal']['connection_probability'].mean()
    low_target_connection_prob = df[df['priority'] == 'low']['connection_probability'].mean()
    limit = 5
    high_num_workers = len(df[df['priority'] == 'high'])
    normal_num_workers = len(df[df['priority'] == 'normal'])
    low_num_workers = len(df[df['priority'] == 'low'])

    # output features (backoff times)
    high_backoff = df[df['priority'] == 'high']['backoff_time'].mean()
    normal_backoff = df[df['priority'] == 'normal']['backoff_time'].mean()
    low_backoff = df[df['priority'] == 'low']['backoff_time'].mean()

    #X = [high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob, limit, high_num_workers, normal_num_workers, low_num_workers]
    X = [high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob] # let's try with fewer dimensions
    Y = [high_backoff, normal_backoff, low_backoff]
    
    # scale the input features to be between 0 and 10
    X = [x * 1 for x in X]
    # scale the output features to be between 0 and 10
    Y = [y * 1 for y in Y]
    return X, Y

# Get the current directory
current_dir = os.path.join(os.getcwd(), 'csmaca_traditional_data_out')

# List all CSV files in the current directory
csv_files = [file for file in os.listdir(current_dir) if file.endswith('.csv')]

# Create an empty dataframe to store the compiled outputs
compiled_df = pd.DataFrame()

# Iterate over each CSV file
for csv_file in csv_files:
    # Read the CSV file into a dataframe
    df = pd.read_csv(csv_file)
    # skip files that don't conform to the data format
    if 'priority' not in df.columns or 'total_work_time' not in df.columns or 'backoff_count' not in df.columns or 'connection_attempts' not in df.columns or 'connection_probability' not in df.columns:
        print(f"Skipping {csv_file} as it does not conform to the data format.")
        continue
    
    # Perform the desired operations on the dataframe
    X, Y = create_data(df)
    
    # Create a new row for the compiled dataframe
    new_row = {'CSV File': csv_file, 'X': X, 'Y': Y}
    
    # Append the new row to the compiled dataframe
    compiled_df = compiled_df.append(new_row, ignore_index=True)

# Print the compiled dataframe
print(compiled_df)

In [None]:
# train neural network with the compiled data
import datetime
import torch
import torch.nn as nn
import torch.optim as optim
%pip install scikit-learn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

#X = [high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob, limit, high_num_workers, normal_num_workers, low_num_workers]
#Y = [high_backoff, normal_backoff, low_backoff]
X = compiled_df['X'].tolist()
Y = compiled_df['Y'].tolist()

# Scale features and targets
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
print("////////////////")
print(X_scaled)
print("////////////////")
print(Y)
y_scaled = scaler_y.fit_transform(Y)
print(y_scaled)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Define the neural network
class Net(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3, 64)  # 3 input features
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 64)
        self.fc_out = nn.Linear(64, 3)  # 3 output features
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        #x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        #x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        #x = self.dropout(x)
        x = torch.relu(self.fc4(x))
        #x = self.dropout(x)
        x = self.fc_out(x)  # Linear output for regression
        return x

# Initialize the network
net = Net()

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# save loss_data for plotting
loss_data = []

# Training loop
for epoch in range(200):  # Number of epochs
    optimizer.zero_grad()   # Zero the gradient buffers
    output = net(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Print loss every 100 epochs
    if epoch % 100 == 99:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    loss_data.append(loss.item())

# plot loss_data
import matplotlib.pyplot as plt
plt.plot(loss_data)
plt.title('Loss Data')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Evaluate the model
with torch.no_grad():
    predictions = net(X_test)
    test_loss = criterion(predictions, y_test)
    print(f'Test Loss: {test_loss.item()}')

# To use the model for prediction, remember to scale the input and inverse scale the output
# generate predictions
predictions_unscaled = scaler_y.inverse_transform(predictions.detach().numpy())

# save model to disk timestamped
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
model_file = f"model_{timestamp}.pth"
torch.save(net.state_dict(), model_file)
print(f"Model saved to {model_file}")


In [None]:
# load model from disk
model = Net()
model.load_state_dict(torch.load(model_file))
model.eval()

new_probs = [0.3, 0.2, 0.05]
print(f"Querying model with probabilities: {new_probs}")
# scale new data to be between 0 and 10

new_probs_scaled = scaler_X.transform([new_probs])
new_probs_tensor = torch.tensor(new_probs_scaled, dtype=torch.float32)
new_prediction = net(new_probs_tensor)
new_prediction_unscaled = scaler_y.inverse_transform(new_prediction.detach().numpy())
print("Answer (unscaled):")
print(new_prediction_unscaled)

# scale prediction back to original scale
new_prediction_scaled = scaler_y.transform(new_prediction_unscaled)
print("Answer (scaled):")
print(new_prediction_scaled)


In [None]:
# now simulate with the above expectations

# handy for testing
#hardwired_backoffs = [2.3136058, 5.0742, 7.02123]
#print(f"To achieve the following connection probabilities: {[0.3, 0.2, 0.05]}, the backoff times should be: {hardwired_backoffs}")

print(f"To achieve the following connection probabilities: {new_probs}, the backoff times should be: {new_prediction_unscaled[0].tolist()}")
# make directory for csv files
dir_path = 'best_quantum_values'
if not os.path.exists(dir_path):
    os.makedirs(dir_path)

NUM_SIMULATIONS = 1000
for i in range(NUM_SIMULATIONS):
    print(f"TEST SIMULATION {i}")
    server = Server(limit=5)
    #workers = simulate(server, worker_count=20, server_life=30, backoff_times=[1.3937324, 3.0184186, 3.6057389])
    workers = simulate(server, worker_count=20, server_life=30, backoff_times=new_prediction_unscaled[0].tolist())
    csv_file = write_to_csv(workers, subdir=dir_path)
    df = pd.read_csv(csv_file)
    # Delete the CSV file if we don't want to pollute the repo
    #os.remove(csv_file)
    print(f"Average connection probability for high priority workers: {df[df['priority'] == 'high']['connection_probability'].mean()}")
    print(f"Average connection probability for normal priority workers: {df[df['priority'] == 'normal']['connection_probability'].mean()}")
    print(f"Average connection probability for low priority workers: {df[df['priority'] == 'low']['connection_probability'].mean()}")