In [1]:
# Import necessary libraries
import threading
import random
import pandas as pd

# Load the dataset from the provided file
filename = "coursegrades.txt"
columns = ["Year", "Course Name", "Grade", "University"]

# Read the file into a DataFrame
with open(filename, "r") as file:
    data = [line.strip().split(", ") for line in file.readlines()]

dataset = pd.DataFrame(data, columns=columns)

# Convert year and grade to numeric for proper handling
dataset["Year"] = pd.to_numeric(dataset["Year"])
dataset["Grade"] = pd.to_numeric(dataset["Grade"])

# Shared dataset to be modified by threads
shared_dataset = dataset.copy()

# Binary lock for synchronization
lock = threading.Lock()

# Function to add entries to the shared dataset
def add_entries(thread_id, num_entries=5):
    global shared_dataset
    for _ in range(num_entries):
        # Generate a random new entry
        new_entry = {
            "Year": random.choice([2022, 2023, 2024]),
            "Course Name": random.choice(dataset["Course Name"].unique()),
            "Grade": random.randint(60, 100),
            "University": random.choice(dataset["University"].unique()),
        }
        # Acquire lock before modifying the dataset
        with lock:
            shared_dataset = pd.concat([shared_dataset, pd.DataFrame([new_entry])], ignore_index=True)

# Function to simulate concurrent data addition
def simulate_concurrent_addition(num_threads=10, entries_per_thread=5):
    threads = []
    for i in range(num_threads):
        thread = threading.Thread(target=add_entries, args=(i, entries_per_thread))
        threads.append(thread)
        thread.start()

    # Wait for all threads to complete
    for thread in threads:
        thread.join()

# Function to verify the consistency of the dataset
def verify_consistency():
    # Total number of entries
    total_entries = len(shared_dataset)
    # Distribution per course
    distribution = shared_dataset["Course Name"].value_counts()
    return total_entries, distribution

# Function to test the lock's effectiveness
def test_without_lock(num_threads=10, entries_per_thread=5):
    global shared_dataset, lock
    # Remove the lock
    lock = None

    def add_entries_without_lock(thread_id, num_entries=5):
        global shared_dataset
        for _ in range(num_entries):
            new_entry = {
                "Year": random.choice([2022, 2023, 2024]),
                "Course Name": random.choice(dataset["Course Name"].unique()),
                "Grade": random.randint(60, 100),
                "University": random.choice(dataset["University"].unique()),
            }
            # Directly modify the dataset without lock
            shared_dataset = pd.concat([shared_dataset, pd.DataFrame([new_entry])], ignore_index=True)

    # Simulate without lock
    threads = []
    for i in range(num_threads):
        thread = threading.Thread(target=add_entries_without_lock, args=(i, entries_per_thread))
        threads.append(thread)
        thread.start()

    # Wait for all threads to complete
    for thread in threads:
        thread.join()

# Main execution
if __name__ == "__main__":
    print("Initial dataset size:", len(shared_dataset))
    
    # Simulate concurrent additions with lock
    simulate_concurrent_addition(num_threads=10, entries_per_thread=5)
    total_entries, distribution = verify_consistency()
    print("After addition with lock:")
    print("Total entries:", total_entries)
    print("Distribution per course:")
    print(distribution)

    # Test without lock
    shared_dataset = dataset.copy()  # Reset the dataset
    test_without_lock(num_threads=10, entries_per_thread=5)
    total_entries, distribution = verify_consistency()
    print("\nAfter addition without lock:")
    print("Total entries:", total_entries)
    print("Distribution per course:")
    print(distribution)

Initial dataset size: 1000
After addition with lock:
Total entries: 1050
Distribution per course:
Course Name
Computer Vision            199
Cyber Security             176
Data Structures            173
Software Engineering       168
Machine Learning           167
Artificial Intelligence    167
Name: count, dtype: int64

After addition without lock:
Total entries: 1028
Distribution per course:
Course Name
Computer Vision            197
Cyber Security             174
Software Engineering       168
Data Structures            165
Machine Learning           163
Artificial Intelligence    161
Name: count, dtype: int64
