#LLM Authorship Attribution

**Description:** This script can be used with openrouter.ai to generate C code samples from various LLMs.

**Insturctions:** In STEP 0, all required dependencies are installed. In STEP 1, we define 300 programming task templates, which together can generate around 2.1 billion unique questions. In STEP 2, helper functions are introduced. Finally, in STEP 3, the dataset is actually generated.

###STEP 0 -  Install all necessary dependencies

In [None]:
#STEP 0: Install all necessary Linux packages (this process may take some time)
print("[*] Installing dependencies...")
!apt-get install -y  \
    build-essential cmake pkg-config git \
    libeigen3-dev libfftw3-dev libgmp-dev \
    libblas-dev liblapack-dev libopenblas-dev \
    libcurl4-openssl-dev libarchive-dev \
    libsdl2-dev libsdl2-image-dev libsdl1.2-dev \
    libbluetooth-dev libcjson-dev libjansson-dev \
    libjson-c-dev libgd-dev libglib2.0-dev \
    libgsl-dev libusb-1.0-0-dev libudev-dev \
    libxml2-dev libncurses5-dev libncursesw5-dev \
    libpoppler-cpp-dev libtiff-dev libpng-dev \
    libjpeg-dev libxslt1-dev libqrencode-dev \
    libreadline-dev libssl-dev libpcap-dev \
    libpthread-stubs0-dev libhdf5-dev \
    liblzma-dev libzstd-dev zlib1g-dev \
    libtar-dev\
    libxlsxwriter-dev libyaml-dev \
    libopencv-dev \
    libtesseract-dev libleptonica-dev \
    python3-numpy python3-pip \
    wget p7zip-full unzip csvtool 7zip libsqlite3-dev libmysqlclient-dev libpq-dev libportaudio2 portaudio19-dev freeglut3-dev >/dev/null 2>&1
!pip install lizard openai > /dev/null 2>&1
!pip install --upgrade transformers > /dev/null 2>&1
print("[*] Installation: DONE")
# Test the installation with a simple trick (compiles and links with common libs)
!echo "int main() { return 0; }" > test.c && gcc test.c \
-lm -lfftw3 -lsqlite3 -lcrypto -lmysqlclient -lpq -lssl \
-lportaudio -lpcap -lqrencode -lSDL2 -lglut -lGLU -lGL -lcurl \
-lgmp -lblas -llapack -lopenblas -larchive -lbluetooth -lcjson -ljansson \
-ljson-c -lgd -lglib-2.0 -lgsl -lusb-1.0 -ludev -lxml2 -lncurses -lpoppler-cpp \
-ltiff -lpng -ljpeg -lxslt -lreadline -lpthread  -llzma -lzstd -lz \
-lyaml -lopencv_core -lopencv_imgproc -lopencv_highgui -ltesseract -llept \
 -lxlsxwriter \
-o test 2>/dev/null && echo "[*] Compilation Test: SUCCESS" || echo "[*] Compilation Test: FAILED"
!rm test & rm test.c 2>/dev/null

print("[*] Loading packages...")
import random, string,re,os,json,tempfile,subprocess,threading,hashlib,lizard
from openai import OpenAI #opentrouter.ai
#import openai #Oritinal OpenAI
import pandas as pd
from collections import Counter
from tqdm import tqdm
from threading import Lock
import matplotlib.pyplot as plt
from transformers import BertTokenizer
import numpy as np
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor, as_completed
print("[*] Load: DONE")

[*] Installing dependencies...
[*] Installation: DONE
[*] Compilation Test: SUCCESS
[*] Loading packages...
[*] Load: DONE


### STEP1 - 300 Programming template


In [None]:
#For Thread safe JSON write
dataset_lock = Lock()

#32 C keywrords for analysis
C_KEYWORDS = [
    "auto", "break", "case", "char", "const", "continue", "default", "do",
    "double", "else", "enum", "extern", "float", "for", "goto", "if",
    "int", "long", "register", "return", "short", "signed", "sizeof", "static",
    "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"
]

c_dynamic_prompts = [
  # General Programming
  "Write a C program to sort an array of {size} integers using bubble sort.",
  "Write a C program to reverse a linked list of {length} nodes.",
  "Make a C program to implement a binary search tree from an array of {size} positive integers. If the element is not found, return -1.",
  "Create a C program to check if the string '{string}' is a palindrome.",
  "Write a C function to count vowels and consonants in the string: '{string}'.",
  "Write a C program to convert the decimal number {number} to binary and print the binary form to the console.",
  "Create a C program that reads '{filename}' and counts the words.",
  "Write a C program to implement a queue with a maximum size of {size}. It starts removing the oldest elements when the size limit is reached.",
  "Write a C program that implements a multi-threaded web server on port {port}.",
  "Generate a C program that solves a {n}x{n} Sudoku puzzle.",
  "Write a C program that implements merge sort on an array of {size} integers.",
  "Write a C program that checks if {year} is a leap year.",
  "Write a C program that reads a CSV file and fixes the formatting issues such that all of the cell contents are delimited by \" characters.",
  "Create a C program to simulate a digital clock for {duration} seconds.",
  "Write a C program that implements a stack with a capacity of {size}. It starts removing items that had their expiry time reached. This expiry can be set when adding an item. It is {duration} minutes by default.",
  "Make a C function that finds the largest and smallest elements in a floating point array of size {size}.",
  "Write a C program to solve the Tower of Hanoi problem with {disks} disks.",
  "Write a C program to implement a hash table with size {size}.",
  "Write a C function that gets a list of integers and calculates the sum of all missing numbers between the minimum and maximum values in the list. For example: [1, 2, 4, 6] -> 3 + 5 = 8.",
  "Write a C program to detect memory leaks in malloc/free usage of a compiled C application.",
  "Build a C program that counts character frequency in '{string}'.",
  "Create a C program that calculates BMI using weight={weight}kg and height={height}cm.",
  "Write a C program to convert infix expression '{expression}' to postfix.",
  "Build a C program that simulates DNS lookup for domain '{domain}'.",
  "Create a C program to roll a dice {n} times and track statistics.",
  "Make a C program that tracks {n} employees using structs.",
  "Write a C program that reads a file '{filename}' and inserts a white space character after all lowercase letters that are followed by an uppercase letter. For example: 'helloWorld' -> 'hello World'.",
  "Write a C program that receives two dates and returns the number of days between them.",
  "Write a C program to implement a min-heap of size {size}.",
  "Create a C program to compress text '{string}' using Huffman coding.",
  "Write a C program that gets a file name as a parameters and changes all camel case words to snake case in it.",
  "Build a C program that generates and solves a {size}x{size} maze.",
  "Write a C program to simulate memory allocation using {strategy} fit.",
  "Given a list of scores, return a list with the grades of the students. The grading system is as follows: 90-100 -> A, 80-89 -> B, 70-79 -> C, 60-69 -> D, below 60 -> F. Write the program in C.",
  "Write a C program that implements the LRU algorithm for {pages} page requests.",
  "Make a C program that calculates average rainfall over {months} months using the mm values from forecasts.",
  "Create a C program to simulate a blockchain storing {n} transactions.",
  "Write a C program that implements producer-consumer with {n} items using semaphores.",
  "Write a C program to simulate packet retransmission with packet loss rate {loss_rate}.",
  "Create a C program to demonstrate pointer arithmetic on array of {size} elements.",
  "Make a C program that emulates a simple shell for {n} commands.",
  "Create a C program to simulate an elevator serving {floors} floors.",
  "Write a C program that implements binary search on array of {size} integers.",
  "Implement the Stalin sort algorithm in C, where elements are repeatedly removed from the front and discarded if they are smaller than the last element.",
  "Build a C program to parse DNS packets from file '{filename}'.",
  "Write a C program to encrypt the message '{message}' using XOR with key {key}.",
  "Generate a C program that implements AVL tree insertion for {n} elements.",
  "Make a C program that uses function pointers to execute {n} operations.",
  "Write a C program to simulate bank transactions with {accounts} accounts.",
  "Create a C program to simulate voting for {candidates} candidates.",
  "Write a C program that sorts a list of lists (2D array) in lexicographical order: for each pair of sublists, compare the first element; if they are equal, compare the second element, and continue this way until a difference is found or the end of the list is reached.",
  "Build a C program to detect primes in array: {array}.",
  "Write a C function that gets a string that has letters and numbers in it, and returns a list that contains only the numbers in it with the digits added together to make the longest possible numbers. For example: 'aaa11g5vv9999l' -> [11, 5, 9999].",
  "Write a C program that parses JSON-like string '{json_data}'.",
  "Create a C program that performs {traversal} traversal on a binary tree.",
  "Write a C program that generates a random phone number and checks a custom database whether it has been generated before or not. If not then it saves it to the list. Make sure it works with multiple threads.",
  "Generate a C program that checks if brackets in '{expression}' are balanced.",
  "Write a C program that writes the huffman encoded version of the string '{string}' to a file.",
  "Write a C program to simulate dining philosophers with {n} philosophers.",
  "Create a C program that computes GPA from grades: {grades}.",
  "Write a C program that receives a dictionary with the prices of groceries and a shopping list and returns the total price of the shopping list. Make sure that if an item is scanned multiple times the item itself will appear as 'milkx2' instead of 'milk, milk'.",
  "Write a C program to convert {number} to Roman numerals.",
  "Build a C program that inserts words into a trie: {words}.",
  "Write a C program to compare sorting algorithms on array of size {size}.",
  "Generate a C program that creates a histogram from data: {data}.",
  "Write a C function that when given a string that contains any number of lower or uppercase letters, returns the missing letters in the alphabet. If the string is empty, return all letters.",
  "Write a C program that implements a doubly linked list with {nodes} nodes.",
  "Create a C program to simulate Round Robin scheduling with {n} processes.",
  "Write a C function that receives an integer and returns another integer where the same digits have been randomly shuffled.",
  "Create a C program to convert {value} from Celsius to Fahrenheit.",
  "Build a C program to perform polynomial addition for {n} terms.",
  "Write a C function that receives two integer lists and returns a 1 for each element that differs and a 0 for each element that is the same.",
  "Write a C function that verifies if an email address is valid.",
  "Write a C function that verifies if a URL points to some resource within the current domain: '{url}'.",
  "Write a C program that analyzes the text '{text}' and estimates reading level.",
  "Create a C program to remove duplicates from array: {array}.",
  "Write a C program to simulate hotel booking for {rooms} rooms.",
  "Generate a C program to classify IP: '{ip_address}'.",
  "Write a C program that generates CAPTCHA of length {length}.",
  "Write a C function that given two lists of integers between 0 and 9, returns the sum of the two numbers represented by the lists. The first list is the most significant digit, so [1, 2, 3] + [4, 5, 6] = 579.",
  "Create a C program for a number guessing game between 1 and {max}.",
  "Build a C program that manages a to-do list with {tasks} tasks stored in file.",
  "Write a C program to compress '{string}' using RLE encoding.",
  "Make a C program that checks if '{expression}' has balanced parentheses.",
  "Write a C program that calculates the determinant of a {n}x{n} matrix.",
  "Create a C program to simulate paging with {frames} frames and {pages} pages.",
  "Generate a C program to analyze log file '{log_file}' for unique visitors.",
  "Write a C program that simulates elevator control for {people} people with priorities.",
  "Create a C program to compute (base^{exp}) mod {mod} efficiently.",
  "Write a C program to generate and verify CRC32 checksum for '{data}'.",
  "Build a C program to simulate a queue system with {customers} customers and average wait time of {wait_time} minutes",
  "Write a C program that generates the Mandelbrot set in ASCII art up to {iterations} iterations.",
  "Write a C program to simulate a Markov chain with {states} states and {steps} steps.",
  "Write a C program that simulates a Brownian motion for {particles} particles.",
  "Write a C function that receives a number n as a parameter and returns the identity matrix of size n x n. If the number is negative, return the reversed identity matrix of the same (-n) x (-n) size.",
  "Create a C program that converts the name of a persion into phone dial format. For example: 'John Doe' -> '5636 363'.",
  "Implement the quicksort algorithm in C to sort an array containing any number of positive integers, where all values are less than {limit}.",
  "Build a C program to simulate the spread of a virus in a population using SIR model equations.",
  "Write a C program that approximates the solution to the Traveling Salesman Problem for {n} cities using a greedy heuristic.",
  "Generate a C program to render a fractal tree in ASCII art with {depth} recursion depth.",
  "Make a C program to analyze prime gaps between primes up to {limit}.",
  "Write a C program that given a set of poker hands, evaluates their strength and ranks them against eachother.",
  "Create a C program that computes the discrete Fourier transform (DFT) of a sequence of {n} values.",
  "Write a C program that prints the ASCII value of each character in a string.",
  "Make a C program to count the number of words in user input.",
  "Write a C program that displays the current system time.",
  "Create a C program to read integers from a text file '{filename}' and print their sum.",
  "Create a C program to read 32 bit integers from a binary file '{filename}' and print their sum (which may be larger than 32 bits).",
  "Make a C function that finds the maximum and minimum in an array entered by the user.",
  "Make a C program that prints the environment variables to recreate the command that the user ran.",
  "Write an application in the C programming language, that reads from a binary file and receives an encoding as a parameter, then decodes the file contents and prints them to the console. It should support ASCII, HEX, and Base64",
  "Create a C program to perform HTTP GET request to '{url}' and print the response.",
  "Write a C program that simulates a dice roll {n} times and displays statistics.",
  "Build a C program that reverses the contents of a text file.",
  "Make a C program to parse CSV data from '{filename}' and print each record.",
  "Write a C program that plays a number guessing game with the user.",
  "Build a C program to emulate a basic shell (supporting cd, ls, exit).",
  "Create a C program to list all running processes on Linux. Make an optional parameter to filter by name using fuzzy search.",
  "Write a C program to parse and validate an IPv4 address. Also specify if it is public or private.",
  "Make a C program that implements a basic URL shortener using MD5.",
  "Build a C program to implement a HTTP server on port {port} that reads files from the current directory and serves them. It has to support html, css, javascript and common image formats such as png, jpg, and gif.",
  "Write a C program that uses pthreads to parallelize matrix multiplication of any size.",
  "Make a C program that enables communication between two processes using pipes and make a chat application with it.",
  "Build a C program that monitors changes in a directory (like inotify).",
  "Write a C program to detect and report memory leaks in its own process.",
  "Create a class to implement a custom memory allocator in the C programming language.",
  "Write a C function that convert seconds into a human-readable format (e.g., 3661 seconds -> 1 hour, 1 minute, 1 second).",
  "Build a C program that parses ELF headers of executables.",
  "Write a C function that given a python code file, removes all comments and docstrings.",
  "Write a C program that receives a list of executable file paths as arguments, runs them and measures their execution time. It then displays a toplist.",
  "Write a C program that receives a JSON file as an input and created a Croissant document from it.",
  "Write a C function that receives javascript code as an input and returns an obfuscated version of it. Do not use any libraries that do obfuccation, rather implement a custom solution.",
  "Write a C function that calculates the graph permutation of a given graph. The graph is represented as an adjacency list.",
  "Write a C function that given a two numbers, it calulcates the modular inverse of the first number with respect to the second number using the Extended Euclidean Algorithm.",
  "Write a C function that given a string, it returns the longest palindromic substring in it.",
  "Write a C function that given a list of integers, it returns the longest increasing subsequence in it.",
  "Write an application in the C programming language, that when given a list of git repositories, it attempts to clone all of them into a folder while being fault tolerant. It should give a list of all that failed at the end.",
  "Write an application in the C programming language, that receives an input and output file as parameters and convert between YAML and JSON formats depending on what was imported. So example.yaml -> example.json will convert YAML to JSON. If the formats match, return an error!",
  "Write an application in the C programming language, that when given a folder with a LaTeX project inside, it will use pdflatex to compile it into a PDF, removes all unnecessary files and zips it up for publishing.",
  "Write an application in the C programming language, that get a JSON file as an input parameters that contains a single list with strings in it. It feeds all the questions to an LLM using openai standard and saves the output into output.json. Both the api key and the server URL should be added as parameters.",
  "Write a C program that receives a folder as an input and searches all files inside recursively for the word {word} and returns a list of all files that contain it.",
  "Write a C function that finds the least common multiple (LCM) of two numbers {a} and {b}.",
  "Write an application in the C programming language, that solves the farm problem. It receives a list of animals and their counts, and calculates the total number of legs. The animals are: cows (4 legs), chickens (2 legs), pigs (4 legs) and sheep (4 legs). The program receives these by asking the user to enter each one by one.",
  "Write a C function that when given a URL for an ArXiV paper, it prints the paper's DOI URL.",
  "Write a C program that showcases all binary operations on bytes including bitshifts and rotations.",
  "Write a C program that implements a simple command line calculator that supports addition, subtraction, multiplication and division. It also handles operator precedence and parentheses correctly.",
  "Write a C function that when given an xml file path and a tag name, it returns all the text content of that tag in the file. If there are multiple tags, return all their contents separated by an enter.",
  "Write a C program that when given a file removes all {grades} characters from it regardless of casing.",
  "Write a C program that when given a file, it removes all the extra whitespace from the beginning and end of each line, and also removes all empty lines.",
  "Write an application in the C programming language, that when given a docker container name and run parameters, it starts that container {n} number of times and returns their logs into files named after the container name and the run number.",
  "Write an application in the C programming language, that when run automatically resolves all git conflicts in the current repository by taking the version from the current branch. If the file name is {filename}, then take the version of the incoming branch instead. It should also commit the changes and push them to the remote repository.",
  "Write a C function that receives an AWS ARN and returns which service it belongs to. If it is not a valid ARN, return an error.",
  "Write a C function that is capable of converting between celsius, fahrenheit and kelvin. It should take a temperature value and a unit as input and return the converted value in all three units. Return an error if the temperaturs is below absolute zero in the given unit.",
  "Write a C program that generates {length} random colors in hexadecimal format. Make sure that all are unique and not too similar to each other within a 1% threshold.",
  "Create a C program that generates a random UUID and prints it.",

  # Mathematics
  "Create a C program to implement a calculator for {operation} operations.",
  "Generate a C program that checks if {number} is a palindrome.",
  "Write a C program to find the factorial of {number} using recursion.",
  "Create a C program that finds the GCD of {number1} and {number2}.",
  "Calculate the positive or negative dominance of a list by looking at the number of positive and negative numbers. If they match, then the list is neutral. -1 for negative dominance, 0 for neutral and 1 for positive dominance. Write the program in C language.",
  "Write a C program that prints the first {n} Fibonacci numbers.",
  "Make a C program that multiplies two {n}x{n} matrices.",
  "Create a C program to check if {number} is an Armstrong number.",
  "Write a C program to print {rows} rows of Pascal's triangle.",
  "Given a list of positive integers, select the most efficient way of adding them up so that each group equals {weight} and there are the most possible groups. Write the program in C language.",
  "Write an application in the C programming language, that receives a list of parameters such as bullet caliber, bullet weight, barrel length and muzzle velocity, and calculates the kinetic energy of the bullet.",
  "Generate a C program that solves {n} linear equations using Gauss-Jordan method.",
  "Write a C program that uses threads to compute factorial of {number}.",
  "Write a C function to parse and evaluate the mathematical expression '{expression}'.",
  "Make a C program to detect cycle in directed graph with {nodes} nodes.",
  "For a given list [x1, x2, x3, ..., xn] compute the last (decimal) digit of x1 ^ (x2 ^ (x3 ^ (... ^ xn))). E. g., with the input [3, 4, 2], your code should return 1 because 3 ^ (4 ^ 2) = 3 ^ 16 = 43046721. Make sure the application works with high powers as well. Write the program in C Language.",
  "Write a C function that takes a matrix of any nxm size and returns its transpose.",
  "Create a C program that calculates the area of a triangle given its base and height.",
  "Write a C program that computes the Riemann zeta function ζ(s) for s = {s} using a series approximation.",
  "Create a C program to numerically integrate the function f(x) = x^{power} from {a} to {b} using Simpson's rule.",
  "Generate a C program that finds all eigenvalues of a {n}x{n} matrix.",
  "Build a C program to solve ordinary differential equations using Euler's method with {steps} steps.",
  "Build a C program to find all roots of a quadratic equation ax^2 + bx + = 0.",
  "Write a C function that calculates the standard deviation of a list of numbers.",
  "Build a C program to solve the N-Queens problem for N = {n}.",
  "Create a C program that calculates pi to {digits} decimal places using the Leibniz formula.",
  "Write a C program to perform matrix inversion for a {n}x{n} matrix.",
  "Make a C program to simulate genetic algorithms for optimizing the function f(x) = {expression}.",
  "Create a C program to solve a system of nonlinear equations using the Newton-Raphson method.",
  "Write a C program to numerically estimate the value of the definite integral ∫f(x)dx from {a} to {b} using Monte Carlo integration.",
  "Create a C program that implements the FFT (Fast Fourier Transform) on an array of {n} complex numbers.",
  "Write a C program that prints the multiplication table for a number entered by the user.",

  # Finance & Economics
  "Create a C program to calculate compound interest with P={principal}, R={rate}, T={time}.",
  "Write an application in the C programming language, that generates an investment disclosure statement for a given list of assets. It should include the asset name, and whether the investor is a majority or minority shareholder.",
  "Write a C function that when given a list of asset purcases, sells and their current value, calculates the time weighted return of the portfolio.",
  "Write a C function that when given an stock's outstanding shares, current price and number of splits, it calculates the market capitalization of the stock.",
  "Write a C function that when given a list of stock prices, it calculates the average price over the last {n} days.",
  "Write a C function that when given a list of stock prices, it calculates the standard deviation of the prices over the last {n} days.",
  "Write an application in the C programming language, that asks for information such as income, capital gains, dividens and tax percentages, then calculates the tax owed based on the current tax brackets. It also provides a breakdown of the income sources and expenses.",

  # Database & Storage
  "Write an application in the C programming language, that gets an sqlite database as a parameter and recursively lists all the databases and tables in it.",
  "Write a C function that retreives some data from a Redis cache at {url} and returns a specified optional string if there is a cache miss.",
  "Write an application in the C programming language, that connects to a PostgreSQL database with IP {ip} and default port and gets the last two entries from the '{db_table}' table.",
  "Write a C program that connects to a MongoDB database at {url} and retrieves all documents from the '{db_table}' collection.",
  "Write a C program that gets a list of AWS S3 bucket URLs and a role secret token, then downloads all of them to a local folder.",

  # File operations
  "Write a C program that analyzes the entropy of a given file.",
  "Write a C program to remove all duplicate lines from a text file.",
  "Write a C program to analyze word frequency in '{filename}'.",
  "Create a C program that detects and reports palindromic lines in a file.",
  "Create a C program that checks if a file '{filename}' exists.",
  "Create a C program to compress a file '{filename}' using Run-Length Encoding (RLE).",
  "Write an application in the C programming language, that when given a list of paths, validates if all of them are valid, exists and are writeable by the current used. It should reaturn each path where this is not the case.",
  "Write an application in the C programming language, that receives a folder path and a file extension, and deletes all files in that folder with the given extension recursively. Make sure it does not accidentally delete itself and it does not follow symbolic links.",
  "Write a C program that copies the contents of one file to another using the specified command line arguments, and allows for rewriting using regex.",
  "Write a C function that creates a temporary folder for the process to use and returns the full path as a string. Make sure it has high entropy, so no two processes will write to the same folder.",

  # Networking
  "Create a C program to simulate TCP's three-way handshake with socket programming.",
  "Build a C program that finds and replaces all occurrences of '{word}' in a file with '{replacement}'.",
  "Write a C program to analyze network packets from interface '{interface}'.",
  "Create a C program to monitor network bandwidth usage of the current machine.",
  "Create a C program that connects to server IP '{ip_address}' on port {port} and sends the message '{message}'",
  "Write an application in the C programming language, that sends ping packets to a given list of IP addresses and reports the average round-trip time for each address.",
  "Write an application in the C programming language, that when given an IPv4 or IPv6 address, checks how many hops it takes to reach it.",

  # Hardware & Systems
  "Build a C program to monitor CPU temperature (Linux only).",
  "Create a C program that turns on GPIO pin 0 for {number} milliseconds when GPIO pin {shift} is pressed on a Raspberry Pi.",
  "Write a C program to scan for available Bluetooth devices and return them in a list.",
  "Make a C program to detect and list all USB devices connected to the system. if a given device is a thumb drive, it should also show it's capacity.",
  "Build a C program that uses mmap to share memory between two processes.",
  "Create a function that allocates virtual disk space in the RAM and returns a file descriptor for it. The size of the disk should be {size} kilobytes.",
  "Create a C program to monitor CPU and memory usage for {duration} seconds.",
  "Create an application in the C programming language, that sets the system's sound volume to {volume}%. It should work on Windows and Linux and Mac.",

  # Security & Cryptography
  "Build a C program that analyzes the strength of a password based on criteria such as length, number of special characters and randomness. Example password: '{string}'.",
  "Write a C program that generates a random password of length {length} containing uppercase, lowercase, digits and special characters.",
  "Create a C program that encrypts and decrypts '{string}' using a random XOR key.",
  "Create a C program that captures and logs all HTTP traffic on port {port} and saves it to a file.",
  "Write a C program to perform a simple port scan on {ip_address} for the top 15 most common ports",
  "Build a C program that calculates a SHA-256 hash of a file given a path. Make sure it handles large files efficiently.",
  "Make a C program that implements a basic firewall (block/allow IPs) at user level.",
  "Write a C program that simulates a simple DNS resolver for domain '{domain}'.",
  "Build a C program to encrypt the string '{string}' using Caesar cipher (shift = {shift}).",
  "Make a C function that calculates the public key in a finite-field Diffie-Hellman key exchange given a prime, a base and a secret.",
  "Make a C function that calculates the shared key in a finite-field Diffie-Hellman key exchange given a prime, a base and a public key.",
  "Write an example application in C language, that implements oblivious an pseudorandom function for demonstration.",
  "Implement a C function that given some bytes and a key, calculates the HMAC hash of it using SHA256. Do not use a library for the HMAC, implement it manually.",
  "Write a C program that when given a SHA1 hash, attempts to bruteforce it by iteration through all possible combinations of lowercase letters and digits up to length {length}.",
  "Write a C program that receives a url with path inside as an input and starts fuzzing the path from a dictionary. Only use words that are shorter than {length} characters. It returns a list of all paths that returned a 200 OK response.",
  "Write a C program that receives a domain as an input and starts fuzzing the subdomain using a dictionary. Only use words that are shorter than {length} characters. It returns a list of all registered subdomains.",
  "Write a C program that implements a Feistel cipher with a block size of {length} bytes using SHA256 as the round function. It takes a key and a plaintext as input and return the ciphertext. Also implement the decryption function.",
  "Write a C program that implements a simple steganography algorithm that hides a message in an image file by modifying the least significant bits of the pixel values.",
  "Build a C program that encodes and decodes Morse code.",
  "Make a C program to simulate weather (temperature, humidity) over {days} days.",
  "Create a C program that implements a simple genetic algorithm to evolve a population of strings towards a target string '{string}'.",
  "Build a C program that learns to predict the next number in a sequence (very simple perceptron).",
  "Make a C program to sign data using RSA with primes {p}, {q} and SHA-512.",
  "Write a C program to generate L-systems fractal patterns in ASCII.",
  "Make a C program that encrypts a file with XOR using a user-provided key.",
  "Write a C program that when given a TOTP string, it generates a TOTP code for the current time.",
  "Make a C program that chats with the user using pattern-matching responses.",
  "Create a C program that solves Sudoku puzzles using backtracking.",
  "Create a C program to implement the RSA algorithm for key generation, encryption, and decryption with primes {p} and {q}.",

  # Data Structures & Algorithms
  "Build a C program that implements a priority queue using a binary heap.",
  "Create a C program that balances a binary search tree after each insertion.",
  "Write a C program to visualize Dijkstra's algorithm step by step in console.",
  "Make a C program to serialize and deserialize a binary tree to a file.",
  "Write a C program that detects cycles in an undirected graph read from '{filename}'.",

  # Parsing & Compilers
  "Write a C program to parse a simple arithmetic expression and compute its value.",
  "Build a C program that implements a lexical analyzer for identifiers, numbers, and operators.",
  "Create a C program that tokenizes and parses a subset of JSON.",
  "Write a C program to generate an abstract syntax tree for arithmetic expressions.",
  "Make a C program that interprets Brainfuck code from '{filename}'.",
  "Write a C function that parses the AST of a C program and counts the number of keywords in it.",

  # Simulations & Games
  "Create a C program that plays Tic Tac Toe against the user (AI: minimax).",
  "Build a C program to simulate Conway's Game of Life for a grid of size {rows}x{cols}.",
  "Create a C program that animates a bouncing ball in the console.",
  "Write a C program that simulates cellular automata with user-defined rules.",
  "Make a C program that simulates rolling a die with unfair (weighted) probabilities.",
  "Write a C program to simulate and display a traffic light at an intersection.",
  "Implement a small chess program in C, that plays against the user with basic rules.",
  "Build a C program that simulates a basic economy with supply and demand for goods.",
  "Implement a simulation of a game of uno with {n} machine players.",
  "Create a C program that simulates a simple rock paper scissors game between two computers and prints the statistics over time.",
  "Build a C program that simulates a basic ecosystem with predators, prey, and plants.",
  "Create a C program that simulates a basic banking system with accounts, deposits, and withdrawals.",
  "Write a C program that simulates a simple card game (e.g., War) between two players.",
  "Make a C program that simulates a basic ATM with a balance of {balance}. Generate a list of plausible transactions.",
  "Build a C program to simulate a traffic light with durations {time_red}, {time_yellow}, {time_green}.",
  "Make a C program that simulates a simple file system with {files} files. The user can create, delete, and list files, and they can move between folders.",
  "Generate a C program that simulates airline reservation with {seats} seats.",
  "Implement a small battleship game in C language, where the user can place ships on a grid and then attack them. The grid size is {size}x{size}.",

  # Novel and Complex Tasks
  "Create a C program that prints a QR code for the text '{data}'.",
  "Write a C program to generate ASCII art from a grayscale image file.",
  "Build a C program that implements a ray tracer to render a 3D scene in ASCII.",
  "Make a C program that generates a random maze and finds the shortest path through it.",
  "Write a C program that simulates a neural network to classify handwritten digits from the MNIST dataset.",
  "Write a C program that implements a version of the A* pathfinding algorithm on a grid.",
  "Build a C program that simulates a physics engine for 2D objects with gravity and collisions.",
  "Write a C program that simulates a vending machine with customizable items and prices.",
  "Build a C program that visualizes sorting algorithms step by step in the console.",
  "Write a C program that simulates a basic stock market with user-defined companies and prices.",
  "Create a C program that generates a calendar for any given year and highlights holidays of {country}.",
  "Write a C program that implements a chatbot using a predefined set of responses.",
  "Make a C program that generates a random dungeon layout for a text-based RPG.",
  "Write a C program that calculates the shortest route between multiple cities using the Traveling Salesman Problem.",
  "Write a C program that generates a random story by combining predefined characters, settings, and events.",
  "Create a C program that visualizes the Fibonacci sequence as a spiral in ASCII art.",
  "Make a C program that generates a random crossword puzzle from a list of words.",
  "Write an interpreter in the C programming language, for a custom language that supports basic arithmetic operations and variable assignments."
]


sample_values = {
    "number": lambda: random.randint(10, 10000),
    "seats": lambda: random.randint(25, 125),
    "limit": lambda: random.randint(100000, 1000000000),
    "weight": lambda: random.randint(25, 150),
    "height": lambda: random.randint(120, 220),
    "value": lambda: random.randint(-20, 280),
    "days": lambda: random.randint(1, 365),
    "shift": lambda: random.randint(3, 26),
    "rows": lambda: random.randint(10, 100),
    "cols": lambda: random.randint(10, 100),
    "size": lambda: random.randint(5, 100),
    "shift": lambda: random.randint(1, 25),
    "nodes": lambda: random.randint(3, 50),
    "traversal": lambda: random.choice(["inorder", "preorder", "postorder", "levelorder"]),
    "string": lambda: random.choice(["OpenAI2025!", "securePass123", "examplePwd", "Token_ABC987", "Qwerty!@#", "admin2024$", "guest_login", "Pa$$w0rd!", "helloWorld99", "xYz_321_secure", "user_001", "Key#1234", "AI_engine_v2", "accessGranted", "Dev@Mode"]),
    "country": lambda: random.choice([ "United States", "Hungary", "Germany", "France", "India", "China", "Brazil", "Canada", "Japan", "Australia", "South Africa"]),
    "interface": lambda: random.choice(["eth0", "wlan0", "lo"]),
    "length": lambda: random.randint(3, 15),
    "operation": lambda: random.choice(["basic arithmetic", "scientific", "matrix", "statistical analysis", "differential equations", "signal processing", "linear regression", "graph operations", "cryptographic hashing", "boolean logic", "Fourier transform", "complex number arithmetic", "symbolic computation", "vector calculus"]),
    "words": lambda: random.choice(["apple", "banana", "cherry", "date", "elderberry", "fig", "grape", "honeydew", "kiwi", "lemon", "mango", "nectarine", "orange", "papaya", "quince", "raspberry", "strawberry", "tangerine", "ugli", "watermelon"]),
    "replacement": lambda: random.choice(["XXX", "test", "very_good", "date", "REDACTED", "placeholder", "null", "anonymous", "sample_text", "updated_value", "temp123", "confidential", "user_input", "n/a", "final_version"]),
    "array": lambda: [random.randint(1, 100) for _ in range(5)],
    "filename": lambda: random.choice(["data.txt", "input.log", "output.csv", "report.pdf", "image.png", "archive.zip", "config.yaml", "notes.md", "script.py", "results.json", "backup.tar.gz", "readme.txt", "logfile.log", "dataset.xlsx", "summary.docx"]),
    "json_data": lambda: '{"name": "John", "age": 30, "city": "New York"}',
    "word": lambda: random.choice(["error", "success", "user", "connection"]),
    "ip_address": lambda: f"192.168.{random.randint(0, 255)}.{random.randint(0, 255)}",
    "port": lambda: random.choice([80, 443, 21, 22, 8080]),
    "loss_rate": lambda: round(random.uniform(0.0, 0.5), 2),
    "message": lambda: random.choice(["Hello, server!", "Ping", "GET /index.html"]),
    "customers": lambda: random.randint(1, 100),
    "wait_time": lambda: round(random.uniform(1.0, 10.0), 1),
    "balance": lambda: round(random.uniform(100.0, 10000.0), 2),
    "disks": lambda: random.randint(3, 10),
    "year": lambda: random.randint(1900, 2100),
    "domain": lambda: random.choice(["example.com", "openai.com", "myserver.local"]),
    "expression": lambda: random.choice(["(a+b)*c", "x*(y+z)-w", "a+b*(c-d)", "(m-n)/(p+q)", "r^2 + s^2", "log(x) + y", "sqrt(a*b) - c", "(x+y)*(z-w)/v", "a*(b+c*(d-e))", "exp(x) - ln(y)", "(u+v)^2 - w", "abs(x-y) + z", "(a/b) + (c*d)", "sin(theta) * r", "k*(m+n)-sqrt(p)"]),
    "duration": lambda: random.randint(1, 60),
    "principal": lambda: random.randint(1000, 10000),
    "rate": lambda: round(random.uniform(1.0, 10.0), 2),
    "time": lambda: random.randint(1, 10),
    "pages": lambda: random.sample(range(1, 20), k=5),
    "frames": lambda: random.randint(3, 10),
    "grades": lambda: [random.choice(['A', 'B', 'C', 'D', 'F']) for _ in range(5)],
    "text": lambda: "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
    "ip": lambda: f"10.0.{random.randint(0,255)}.{random.randint(0,255)}",
    "data": lambda: ''.join(random.choices(string.ascii_letters + string.digits, k=random.randint(10, 30))),
    "n": lambda: random.randint(2, 100),
    "mod": lambda: random.randint(10, 1000),
    "array": lambda: [random.randint(1, 100) for _ in range(5)],
    "exp": lambda: random.randint(2, 20),
    "base": lambda: random.randint(2, 20),
    "p": lambda: random.choice([3, 5, 7, 11, 13]),
    "q": lambda: random.choice([17, 19, 23, 29]),
    "accounts": lambda: random.randint(1, 10),
    "candidates": lambda: random.randint(2, 5),
    "tasks": lambda: random.randint(3, 10),
    "rooms": lambda: random.randint(5, 50),
    "files": lambda: random.randint(1, 10),
    "shift": lambda: random.randint(1, 25),
    "pages": lambda: random.sample(range(1, 20), k=5),
    "frames": lambda: random.randint(3, 10),
    "log_file": lambda: random.choice(["access.log", "server.log", "errors.log"]),
    "db_table": lambda: random.choice(["users", "orders", "products", "transactions", "sessions", "logs", "inventory", "sales"]),
    "people": lambda: random.randint(1, 10),
    "number1": lambda: random.randint(1, 1000),
    "number2": lambda: random.randint(1, 1000),
    "time_red": lambda: random.randint(5, 60),
    "time_yellow": lambda: random.randint(2, 10),
    "time_green": lambda: random.randint(5, 60),
    "url": lambda: f"{random.choice(['http', 'https'])}://{random.choice(['example.com', 'privatesite.com', 'myserver.local', 'intranet.org', 'cluster.org'])}/{random.choice(['index', 'home', 'api', 'status'])}?id={random.randint(1, 1000)}",
    "strategy": lambda: random.choice(["first", "best", "worst"]),
    "s": lambda: random.uniform(1.5, 5.0),  # s for Riemann zeta, typically >1
    "power": lambda: random.randint(2, 10),
    "a": lambda: random.randint(1, 10),
    "b": lambda: random.randint(11, 50),
    "max": lambda: random.randint(10, 100),
    "floors": lambda: random.randint(2, 50),
    "months": lambda: random.randint(1, 24),
    "key": lambda: random.randint(1, 255),
    "states": lambda: random.randint(2, 10),
    "steps": lambda: random.randint(10, 1000),
    "depth": lambda: random.randint(2, 8),
    "volume": lambda: random.randint(0, 100),

}

###STEP 2 - Define helper functions

In [None]:

dataset_lock = threading.Lock()

def fill_prompt(template: str) -> str:
    prompt = template
    for key, generator in sample_values.items():
        prompt = prompt.replace(f"{{{key}}}", str(generator()))
    return prompt

def extract_code(result: str) -> str:
    patterns = [
        r"```c\s*([\s\S]*?)\s*```",
        r"```C\s*([\s\S]*?)\s*```",
        r"c```\s*([\s\S]*?)\s*```",
        r"C```C\s*([\s\S]*?)\s*```",
        r"```\s*([\s\S]*?)\s*```"
    ]
    for pattern in patterns:
        match = re.search(pattern, result)
        if match:
            return match.group(1).strip()
    return result.strip()

def sha256_of(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()

def check_compilable(c_code: str) -> bool:
    linker_flags = [
        "-lm", "-lfftw3", "-lsqlite3", "-lcrypto", "-lmysqlclient", "-lpq", "-lssl",
        "-lportaudio", "-lpcap", "-lqrencode", "-lSDL2", "-lglut", "-lGLU", "-lGL", "-lcurl",
        "-lgmp", "-lblas", "-llapack", "-lopenblas", "-larchive", "-lbluetooth", "-lcjson", "-ljansson",
        "-ljson-c", "-lgd", "-lglib-2.0", "-lgsl", "-lusb-1.0", "-ludev", "-lxml2", "-lncurses", "-lpoppler-cpp",
        "-ltiff", "-lpng", "-ljpeg", "-lxslt", "-lreadline", "-lpthread", "-llzma", "-lzstd", "-lz",
        "-lyaml", "-lopencv_core", "-lopencv_imgproc", "-lopencv_highgui", "-ltesseract", "-llept",
        "-lxlsxwriter"
    ]
    try:
        with tempfile.NamedTemporaryFile(suffix=".c", mode="w", delete=False) as tmp:
            tmp.write(c_code)
            tmp_path = tmp.name
        output_path = tmp_path + ".out"
        cmd = ["gcc", "-c", tmp_path, "-o", output_path] + linker_flags
        result = subprocess.run(
            cmd,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE
        )
        os.remove(tmp_path)
        if os.path.exists(output_path):
            os.remove(output_path)
        return result.returncode == 0
    except Exception:
        return False

def ask_for_c_code(prompt: str, model: str) -> str:
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
    )
    content = response.choices[0].message.content
    c_code = extract_code(content)
    return c_code

def make_record(model, prompt, c_code):
    return OrderedDict([
        ("model_name", model.split("/")[-1]),
        ("prompt", prompt),
        ("c_code", c_code),
        ("SHA256_checksum", sha256_of(c_code)),
        ("char_count", len(c_code)),
        ("num_lines", len(c_code.splitlines()))
    ])

def save_to_dataset(record, filename="dataset.json"):
    with dataset_lock:
        if os.path.exists(filename):
            with open(filename, "r") as f:
                try:
                    dataset = json.load(f)
                except json.JSONDecodeError:
                    dataset = []
        else:
            dataset = []
        existing = {(d["model_name"], d["prompt"], d["c_code"]) for d in dataset}
        new_key = (record["model_name"], record["prompt"], record["c_code"])
        if new_key not in existing:
            dataset.append(record)
            with open(filename, "w") as f:
                json.dump(dataset, f, indent=2)
        else:
            print("⚠️ Duplicate found, skipping.")

def run_prompt(prompt, model, max_attempts=3):
    for attempt in range(1, max_attempts + 1):
        try:
            c_code = ask_for_c_code(prompt, model)
            if not c_code.strip():
                print(f"[!] Attempt {attempt}: No C code generated: RETRY")
                continue
            num_lines = len(c_code.splitlines())
            if num_lines < 20:
                print(f"[!] Attempt {attempt}: Code under 20 lines: RETRY")
                continue
            if not check_compilable(c_code):
                print(f"[!] Attempt {attempt}: Code not compilable: RETRY")
                continue
            record = make_record(model, prompt, c_code)
            save_to_dataset(record)
            return  # Success! Stop trying
        except Exception as e:
            print(f"[!] Attempt {attempt}: Error with prompt '{prompt[:50]}': {e}")
    print(f"[!] All {max_attempts} attempts failed for prompt: {prompt[:50]}")


###STEP 3 - Run the Dataset creation

In [None]:
#--------------------CONFIGURATION-------------------------
API_KEY="<YOUR-API-KEY-HERE>"
model_name = "<YOUR-OPENROUTER MODEL NAME HERE>"
MAX_WORKERS = 10
num_iterations = 1
client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
#--------------------CONFIGURATION-------------------------


print(f"[*] Running {num_iterations} iterations across {len(c_dynamic_prompts)} prompts...")

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    for i in range(num_iterations):
        print(f"\n[*] Iteration {i+1}/{num_iterations}")
        futures = []
        for template in tqdm(c_dynamic_prompts, desc=f"Queuing prompts (Round {i+1})"):
            prompt = fill_prompt(template)
            futures.append(executor.submit(run_prompt, prompt, model_name))
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing results"):
            pass  # All handling is inside `run_prompt()`
