In [None]:
import re

class CppCryptoVulnerabilityDetector:
    def __init__(self):
        self.vulnerabilities = []

    def detect_weak_crypto(self, code):
        # Detects weak cryptographic algorithms like MD5, SHA1, DES
        weak_crypto_patterns = [
            r"\bMD5\b",  # Detect MD5 usage
            r"\bSHA1\b",  # Detect SHA1 usage
            r"\bDES\b"  # Detect DES usage
        ]
        for pattern in weak_crypto_patterns:
            matches = re.finditer(pattern, code)
            for match in matches:
                self.vulnerabilities.append(f"Weak cryptographic algorithm ({match.group(0)}) detected at line {self.get_line_number(code, match.start())}")

    def detect_insecure_encryption_mode(self, code):
        # Detects insecure encryption modes like ECB
        ecb_pattern = r"\bECB\b"  # Detect ECB mode usage
        matches = re.finditer(ecb_pattern, code)
        for match in matches:
            self.vulnerabilities.append(f"Insecure encryption mode (ECB) detected at line {self.get_line_number(code, match.start())}")

    def detect_hardcoded_keys(self, code):
        # Detects hardcoded cryptographic keys in variable assignments
        key_pattern = r'\b(?:const\s+char\s*\*|std::string|char\s*\[\])\s*\w+\s*=\s*".*";'
        matches = re.finditer(key_pattern, code)
        for match in matches:
            if "key" in match.group(0).lower():
                self.vulnerabilities.append(f"Hardcoded cryptographic key detected at line {self.get_line_number(code, match.start())}")

    def detect_static_iv(self, code):
        # Detects static or hardcoded Initialization Vectors (IVs)
        iv_pattern = r'\b(?:const\s+char\s*\*|std::string|char\s*\[\])\s*\w+\s*=\s*".*";'
        matches = re.finditer(iv_pattern, code)
        for match in matches:
            if "iv" in match.group(0).lower():
                self.vulnerabilities.append(f"Static or hardcoded Initialization Vector (IV) detected at line {self.get_line_number(code, match.start())}")

    def detect_insecure_password_hashing(self, code):
        # Detects the use of raw cryptographic hash functions for password hashing
        password_hashing_patterns = [
            r"\bSHA256\b",  # Detect SHA-256 usage (insecure if used without salting)
            r"\bSHA512\b"  # Detect SHA-512 usage (insecure if used without salting)
        ]
        for pattern in password_hashing_patterns:
            matches = re.finditer(pattern, code)
            for match in matches:
                # Check if this is used in password-related code (heuristic check)
                if re.search(r'password', code[match.start() - 50:match.end() + 50], re.IGNORECASE):
                    self.vulnerabilities.append(f"Insecure password hashing using ({match.group(0)}) detected at line {self.get_line_number(code, match.start())}. Consider using a key derivation function like bcrypt or Argon2.")

    def get_line_number(self, code, index):
        # Returns the line number in the code for a given index
        return code[:index].count('\n') + 1

    def analyze_code(self, code):
        self.detect_weak_crypto(code)
        self.detect_insecure_encryption_mode(code)
        self.detect_hardcoded_keys(code)
        self.detect_static_iv(code)
        self.detect_insecure_password_hashing(code)

    def report(self):
        if self.vulnerabilities:
            print("Detected Vulnerabilities:")
            for vulnerability in self.vulnerabilities:
                print(vulnerability)
        else:
            print("No vulnerabilities detected.")

def main():
    # Example C++ code to analyze (this would normally be read from a file)
    cpp_code_to_analyze = """
#include <iostream>
#include <openssl/md5.h>
#include <openssl/aes.h>
#include <openssl/des.h>
#include <openssl/sha.h>
#include <string>

void hashPassword(const std::string& password) {
    unsigned char hash[SHA256_DIGEST_LENGTH];
    SHA256((unsigned char*)password.c_str(), password.length(), hash);  // Insecure: Raw SHA-256 used for password hashing
}

void encryptWithHardcodedKey(unsigned char* data) {
    const char* key = "hardcoded_secret_key";  // Vulnerable: Hardcoded key
    const char* iv = "fixed_iv_123456";  // Vulnerable: Hardcoded IV
    AES_KEY aes_key;
    AES_set_encrypt_key((unsigned char*)key, 128, &aes_key);
    AES_cbc_encrypt(data, data, 16, &aes_key, (unsigned char*)iv, AES_ENCRYPT);  // Vulnerable: CBC with a static IV
}

void encryptWithECB(unsigned char* data) {
    AES_KEY aes_key;
    AES_set_encrypt_key((unsigned char*)"another_key_1234", 128, &aes_key);
    AES_ecb_encrypt(data, data, &aes_key, AES_ENCRYPT);  // Vulnerable: Using ECB mode
}

int main() {
    std::string password = "mysecretpassword";
    hashPassword(password);

    unsigned char data[16] = {0};
    encryptWithHardcodedKey(data);
    encryptWithECB(data);

    return 0;
}
"""

    # Analyze the C++ code
    detector = CppCryptoVulnerabilityDetector()
    detector.analyze_code(cpp_code_to_analyze)
    detector.report()

if __name__ == "__main__":
    main()


Detected Vulnerabilities:
Insecure encryption mode (ECB) detected at line 25
Hardcoded cryptographic key detected at line 15
Static or hardcoded Initialization Vector (IV) detected at line 16
Insecure password hashing using (SHA256) detected at line 11. Consider using a key derivation function like bcrypt or Argon2.


In [None]:
import clang.cindex
from google.colab import drive
drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/test/sample.cpp'  # Update this with the correct file path

class CppVulnerabilityAnalyzer:
    def __init__(self, file_path):
        self.file_path = file_path
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []

    def analyze(self):
        translation_unit = self.index.parse(self.file_path)
        self.traverse_ast(translation_unit.cursor)

    def traverse_ast(self, node, thread_context=False, lock_acquired=False, lock_vars=None, func_defs=None):
        """
        Recursively traverse the AST and check for vulnerabilities.
        """
        # Initialize lock_vars and func_defs if not provided
        if lock_vars is None:
            lock_vars = set()
        if func_defs is None:
            func_defs = set()

        # Debugging: Print node kind, spelling, and line number
        if node.location.file:  # Only print nodes within the file being analyzed
            print(f"Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect function definitions to establish context
        if node.kind == clang.cindex.CursorKind.FUNCTION_DECL:
            func_defs.add(node.spelling)
            print(f"Function definition detected: {node.spelling} at line {node.location.line}")

        # Detect thread creation functions
        if node.kind == clang.cindex.CursorKind.CALL_EXPR and node.spelling in ["pthread_create", "std::thread"]:
            print(f"Thread-related function detected: {node.spelling} at line {node.location.line}")
            thread_context = True  # Entering a multithreaded context

        # Detect locking mechanisms (locking the mutex)
        if node.kind == clang.cindex.CursorKind.CALL_EXPR and ("lock" in node.spelling.lower() or "mutex" in node.spelling.lower()):
            print(f"Locking mechanism detected: {node.spelling} at line {node.location.line}")
            lock_acquired = True  # A lock has been acquired
            # Register lock variable
            for arg in node.get_arguments():
                lock_vars.add(arg.spelling)

        # Detect potential shared data access without locks
        if thread_context and not lock_acquired:
            self.detect_shared_data_access(node, lock_vars)

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node)

        # Recursively traverse all child nodes
        for child in node.get_children():
            # Pass down the context information
            self.traverse_ast(child, thread_context, lock_acquired, lock_vars, func_defs)

    def detect_shared_data_access(self, node, lock_vars):
        """
        Detect potential race conditions by checking shared data access without locks.
        """
        if node.kind == clang.cindex.CursorKind.DECL_REF_EXPR:
            # Check if the variable is a shared resource and not a lock variable
            if node.spelling not in lock_vars and node.spelling != "":
                print(f"Shared resource accessed without lock: {node.spelling} at line {node.location.line}")
                self.vulnerabilities.append(f"Potential race condition detected for resource '{node.spelling}' at line {node.location.line}")

    def detect_weak_crypto(self, node):
        """
        Detect the use of weak cryptographic algorithms.
        """
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Check for weak cryptographic algorithms (MD5, SHA1)
            if node.spelling in ["MD5", "SHA1"]:  # Add more weak algorithms if needed
                print(f"Weak cryptographic algorithm ({node.spelling}) detected at line {node.location.line}")
                self.vulnerabilities.append(f"Weak cryptographic algorithm ({node.spelling}) detected at line {node.location.line}")

    def report(self):
        if self.vulnerabilities:
            print("\nDetected Vulnerabilities:")
            for vulnerability in self.vulnerabilities:
                print(vulnerability)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    analyzer = CppVulnerabilityAnalyzer(file_path)
    analyzer.analyze()
    analyzer.report()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Visiting node: CursorKind.DECL_REF_EXPR (__builtin_nans) at line 1795
Visiting node: CursorKind.UNEXPOSED_EXPR () at line 1795
Visiting node: CursorKind.STRING_LITERAL ("") at line 1795
Visiting node: CursorKind.CXX_METHOD (denorm_min) at line 1798
Visiting node: CursorKind.COMPOUND_STMT () at line 1798
Visiting node: CursorKind.RETURN_STMT () at line 1798
Visiting node: CursorKind.FLOATING_LITERAL () at line 1798
Visiting node: CursorKind.VAR_DECL (is_iec559) at line 1800
Visiting node: CursorKind.BINARY_OPERATOR () at line 1801
Visiting node: CursorKind.BINARY_OPERATOR () at line 1801
Visiting node: CursorKind.UNEXPOSED_EXPR (has_infinity) at line 1801
Visiting node: CursorKind.DECL_REF_EXPR (has_infinity) at line 1801
Visiting node: CursorKind.UNEXPOSED_EXPR (has_quiet_NaN) at line 1801
Visiting node: CursorKind.DECL_REF_EXPR (has_quiet_NaN) at line 1801
Visiting node: CursorKind.BINARY_OPERATOR () at line 1801
Visitin

In [None]:
import clang.cindex
from google.colab import drive
drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/sample.cpp'

# Set up Clang library if needed (usually required for non-standard setups)
# clang.cindex.Config.set_library_file("/path/to/libclang.so")

class CppVulnerabilityAnalyzer:
    def __init__(self, file_path):
        self.file_path = file_path
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []

    def analyze(self):
        translation_unit = self.index.parse(self.file_path)
        self.traverse_ast(translation_unit.cursor)

    def traverse_ast(self, node):
        # Detect Race Condition
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Example: Detecting unsafe thread-related functions like pthread_create, without synchronization
            if node.spelling in ["pthread_create", "std::thread"]:
                self.check_race_condition(node)

        # Detect Insufficient Cryptographic Protection
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if node.spelling in ["MD5", "SHA1", "DES"]:
                self.vulnerabilities.append(
                    f"Weak cryptographic algorithm ({node.spelling}) detected at line {node.location.line}"
                )

        # Recursively traverse the AST
        for child in node.get_children():
            self.traverse_ast(child)

    def check_race_condition(self, node):
        """
        Check if shared data is accessed without synchronization.
        """
        parent_function = self.find_parent_function(node)
        if parent_function:
            shared_resources = self.detect_shared_resources(parent_function)
            for resource in shared_resources:
                if not self.is_synchronized(resource, parent_function):
                    self.vulnerabilities.append(
                        f"Potential race condition detected for resource '{resource}' at line {node.location.line}"
                    )

    def find_parent_function(self, node):
        """
        Find the parent function containing the node.
        """
        while node is not None and node.kind != clang.cindex.CursorKind.FUNCTION_DECL:
            node = node.semantic_parent
        return node

    def detect_shared_resources(self, function_node):
        """
        Detect variables that may be shared across threads.
        """
        shared_resources = []
        for child in function_node.get_children():
            if child.kind == clang.cindex.CursorKind.VAR_DECL and "shared" in child.spelling.lower():
                shared_resources.append(child.spelling)
        return shared_resources

    def is_synchronized(self, resource, function_node):
        """
        Check if access to the shared resource is synchronized.
        """
        for child in function_node.get_children():
            if child.kind == clang.cindex.CursorKind.CALL_EXPR:
                # Check for synchronization primitives like mutexes
                if "lock" in child.spelling or "mutex" in child.spelling:
                    return True
        return False

    def report(self):
        if self.vulnerabilities:
            print("Detected Vulnerabilities:")
            for vulnerability in self.vulnerabilities:
                print(vulnerability)
        else:
            print("No vulnerabilities detected.")

if __name__ == "__main__":
    analyzer = CppVulnerabilityAnalyzer(file_path)
    analyzer.analyze()
    analyzer.report()


Mounted at /content/drive
No vulnerabilities detected.


In [None]:
import re

# Sample C++ code
cpp_code = """
#include <iostream>
#include <thread>
#include <mutex>
#include <openssl/md5.h>
#include <openssl/sha.h>

// Shared resource with potential race condition
int shared_data = 0;
std::mutex mtx;

void unsafe_thread_func() {
    shared_data++;  // Vulnerable: access without locking
}

void safe_thread_func() {
    std::lock_guard<std::mutex> guard(mtx);
    shared_data++;  // Safe: access with locking
}

void hash_function() {
    const char* input = "data";
    unsigned char output[MD5_DIGEST_LENGTH];
    MD5((unsigned char*)input, strlen(input), output);  // Vulnerable: Using MD5
}

int main() {
    std::thread t1(unsafe_thread_func);
    std::thread t2(unsafe_thread_func);

    t1.join();
    t2.join();

    hash_function();

    return 0;
}
"""

# Regex patterns for detecting vulnerabilities
regex_patterns = {
    "race_condition_threads": r"\bstd::thread\b\s*\w+\s*\(",
    "mutex_lock": r"\bstd::lock_guard<\s*std::mutex\s*>\b|\bstd::mutex\b",
    "weak_crypto": r"\b(MD5|SHA1|DES)\b\s*\(",
}

def detect_vulnerabilities(code):
    vulnerabilities = []

    # Detect race conditions by finding thread usages without mutex locks
    thread_matches = re.findall(regex_patterns["race_condition_threads"], code)
    mutex_matches = re.findall(regex_patterns["mutex_lock"], code)

    if thread_matches:
        if not mutex_matches:
            vulnerabilities.append(f"Potential race condition detected: Threads created without mutex lock.")
        else:
            thread_lines = [match.start() for match in re.finditer(regex_patterns["race_condition_threads"], code)]
            mutex_lines = [match.start() for match in re.finditer(regex_patterns["mutex_lock"], code)]

            # Simple heuristic: if thread creation is detected without a lock in the same function scope
            for thread_pos in thread_lines:
                if not any(mutex_pos < thread_pos for mutex_pos in mutex_lines):
                    vulnerabilities.append(f"Potential race condition detected at position {thread_pos}: Thread without mutex lock.")

    # Detect weak cryptographic algorithms
    weak_crypto_matches = re.finditer(regex_patterns["weak_crypto"], code)
    for match in weak_crypto_matches:
        vulnerabilities.append(f"Weak cryptographic algorithm ({match.group(1)}) detected at position {match.start()}")

    return vulnerabilities

# Analyze the sample code
vulnerabilities = detect_vulnerabilities(cpp_code)

# Report the findings
if vulnerabilities:
    print("Detected Vulnerabilities:")
    for vulnerability in vulnerabilities:
        print(vulnerability)
else:
    print("No vulnerabilities detected.")


Detected Vulnerabilities:
Weak cryptographic algorithm (MD5) detected at position 509


In [None]:
import clang.cindex

# Set up Clang library if needed (usually required for non-standard setups)
# clang.cindex.Config.set_library_file("/path/to/libclang.so")

from google.colab import drive
drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/sample.cpp'

class CppVulnerabilityAnalyzer:
    def __init__(self, file_path):
        self.file_path = file_path
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []

    def analyze(self):
        translation_unit = self.index.parse(self.file_path)
        self.traverse_ast(translation_unit.cursor)

    def traverse_ast(self, node):
        # Detect Race Condition
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Example: Detecting unsafe thread-related functions like pthread_create, without synchronization
            if node.spelling in ["pthread_create", "std::thread"]:
                self.check_race_condition(node)

        # Detect Insufficient Cryptographic Protection
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if node.spelling in ["MD5", "SHA1", "DES"]:
                self.vulnerabilities.append(
                    f"Weak cryptographic algorithm ({node.spelling}) detected at line {node.location.line}"
                )

        # Recursively traverse the AST
        for child in node.get_children():
            self.traverse_ast(child)

    def check_race_condition(self, node):
        """
        Check if shared data is accessed without synchronization.
        """
        parent_function = self.find_parent_function(node)
        if parent_function:
            shared_resources = self.detect_shared_resources(parent_function)
            for resource in shared_resources:
                if not self.is_synchronized(resource, parent_function):
                    self.vulnerabilities.append(
                        f"Potential race condition detected for resource '{resource}' at line {node.location.line}"
                    )

    def find_parent_function(self, node):
        """
        Find the parent function containing the node.
        """
        while node is not None and node.kind != clang.cindex.CursorKind.FUNCTION_DECL:
            node = node.semantic_parent
        return node

    def detect_shared_resources(self, function_node):
        """
        Detect variables that may be shared across threads.
        """
        shared_resources = []
        for child in function_node.get_children():
            if child.kind == clang.cindex.CursorKind.VAR_DECL and "shared" in child.spelling.lower():
                shared_resources.append(child.spelling)
        return shared_resources

    def is_synchronized(self, resource, function_node):
        """
        Check if access to the shared resource is synchronized.
        """
        for child in function_node.get_children():
            if child.kind == clang.cindex.CursorKind.CALL_EXPR:
                # Check for synchronization primitives like mutexes
                if "lock" in child.spelling or "mutex" in child.spelling:
                    return True
        return False

    def report(self):
        if self.vulnerabilities:
            print("Detected Vulnerabilities:")
            for vulnerability in self.vulnerabilities:
                print(vulnerability)
        else:
            print("No vulnerabilities detected.")

if __name__ == "__main__":
    analyzer = CppVulnerabilityAnalyzer(file_path)
    analyzer.analyze()
    analyzer.report()


Mounted at /content/drive
No vulnerabilities detected.


In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Step 1: Prepare the Dataset
# Example C++ code snippets with labels (1 = Vulnerable, 0 = Safe)
code_snippets = [
    # Vulnerable code snippets (Race conditions)
    "std::thread t1(unsafe_thread_func); std::thread t2(unsafe_thread_func); t1.join(); t2.join();",  # Vulnerable
    "shared_data++;",  # Vulnerable: access without locking
    # Safe code snippets (Proper synchronization)
    "std::lock_guard<std::mutex> guard(mtx); shared_data++;",  # Safe
    "std::mutex mtx; std::unique_lock<std::mutex> lock(mtx); shared_data++;",  # Safe
    # Vulnerable code snippets (Weak Cryptographic Functions)
    "unsigned char output[MD5_DIGEST_LENGTH]; MD5((unsigned char*)input, strlen(input), output);",  # Vulnerable: MD5
    "unsigned char hash[SHA_DIGEST_LENGTH]; SHA1(input, strlen(input), hash);",  # Vulnerable: SHA1
    # Safe code snippets (Secure Cryptographic Functions)
    "unsigned char hash[SHA256_DIGEST_LENGTH]; SHA256(input, strlen(input), hash);",  # Safe
    "unsigned char hash[SHA512_DIGEST_LENGTH]; SHA512(input, strlen(input), hash);",  # Safe
]

# Corresponding labels (1 = Vulnerable, 0 = Safe)
labels = [1, 1, 0, 0, 1, 1, 0, 0]

# Step 2: Feature Extraction using TF-IDF
vectorizer = TfidfVectorizer(token_pattern=r'\b\w+\b', max_features=100)
X = vectorizer.fit_transform(code_snippets)
y = np.array(labels)

# Step 3: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Step 4: Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Evaluate the Model
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Output some test predictions
test_snippets = [
    "std::thread t1(unsafe_thread_func); t1.join();",  # Expected Vulnerable
    "std::lock_guard<std::mutex> guard(mtx); shared_data++;",  # Expected Safe
    "MD5((unsigned char*)input, strlen(input), output);",  # Expected Vulnerable
]

# Transform the test snippets
X_test_snippets = vectorizer.transform(test_snippets)
test_predictions = model.predict(X_test_snippets)

print("\nTest Predictions:")
for snippet, prediction in zip(test_snippets, test_predictions):
    print(f"Snippet: {snippet[:50]}... Prediction: {'Vulnerable' if prediction == 1 else 'Safe'}")


Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       2.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0


Test Predictions:
Snippet: std::thread t1(unsafe_thread_func); t1.join();... Prediction: Vulnerable
Snippet: std::lock_guard<std::mutex> guard(mtx); shared_dat... Prediction: Safe
Snippet: MD5((unsigned char*)input, strlen(input), output);... Prediction: Safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from google.colab import drive

# Step 1: Mount Google Drive to access files
drive.mount('/content/drive')

# Step 2: Load code snippets from Google Drive
file_path = '/content/drive/MyDrive/sample.cpp'  # Update with your file path

# Read the content of the file
with open(file_path, 'r') as file:
    code_snippet = file.read()

# Step 3: Load the pre-trained CodeBERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base", num_labels=2)

# Step 4: Tokenize the input code snippet
inputs = tokenizer(code_snippet, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Step 5: Perform inference to classify the code snippet
with torch.no_grad():  # No need for gradient computation during inference
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)

# Step 6: Output the classification result
vulnerability = "Vulnerable" if predictions[0].item() == 1 else "Safe"
print(f"Code is classified as: {vulnerability}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Code is classified as: Vulnerable


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
from google.colab import drive

# Step 1: Mount Google Drive to access files
drive.mount('/content/drive')

# Step 2: Load code snippets from Google Drive
file_path = '/content/drive/MyDrive/sample.cpp'  # Update with your file path

# Read the content of the file
with open(file_path, 'r') as file:
    code_snippet = file.read()

# Step 3: Load the pre-trained CodeBERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base", num_labels=2)

# Step 4: Tokenize the input code snippet
inputs = tokenizer(code_snippet, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Step 5: Perform inference to classify the code snippet
with torch.no_grad():  # No need for gradient computation during inference
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)

# Determine if the code is vulnerable or safe based on CodeBERT
codebert_vulnerability = "Vulnerable" if predictions[0].item() == 1 else "Safe"
print(f"CodeBERT classified code as: {codebert_vulnerability}")

# Step 6: Improved Rule-Based Detection for Explaining Vulnerabilities
def explain_vulnerability(code_snippet):
    explanations = []

    # Example 1: Check for weak cryptographic algorithms (e.g., MD5)
    if re.search(r'\bMD5\b', code_snippet):
        explanations.append("Weak Cryptographic Algorithm: The code uses MD5 for hashing, which is considered insecure due to vulnerabilities to collision attacks. Consider using a more secure algorithm like SHA-256.")

    # Example 2: Check for potential race conditions (e.g., shared resources without locks)
    # Look for shared variable declarations and usage
    shared_var_pattern = re.compile(r'int\s+(\w+)\s*=\s*\d+;')  # Detect shared integer variables
    thread_creation_pattern = re.compile(r'\bstd::thread\b\s*\w*\s*\(\s*(\w+)\s*\)')  # Detect thread creation
    mutex_pattern = re.compile(r'\bstd::(lock_guard|mutex|unique_lock)\b')  # Detect mutex usage

    shared_vars = shared_var_pattern.findall(code_snippet)
    thread_usages = thread_creation_pattern.findall(code_snippet)

    # If there are shared variables and threads created, but no mutex is found, it's a potential race condition
    if shared_vars and thread_usages and not mutex_pattern.search(code_snippet):
        for var in shared_vars:
            explanations.append(f"Potential Race Condition: The shared variable '{var}' is accessed from multiple threads without proper synchronization (like std::lock_guard or std::mutex). This can lead to unpredictable behavior.")

    # More checks can be added here for different types of vulnerabilities

    # Return explanations if any are found
    if explanations:
        return "\n".join(explanations)
    else:
        return None

# Step 7: Hybrid approach - Check both CodeBERT's output and regex patterns
if codebert_vulnerability == "Vulnerable":
    explanation = explain_vulnerability(code_snippet)
    if explanation:
        print("\nExplanation of Vulnerability:")
        print(explanation)
    else:
        print("\nCodeBERT indicated vulnerability, but no specific pattern was matched.")
else:
    explanation = explain_vulnerability(code_snippet)
    if explanation:
        print(f"\nThe code is safe according to CodeBERT but has potential issues:\n{explanation}")
    else:
        print("Code is classified as safe.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


CodeBERT classified code as: Safe

The code is safe according to CodeBERT but has potential issues:
Weak Cryptographic Algorithm: The code uses MD5 for hashing, which is considered insecure due to vulnerabilities to collision attacks. Consider using a more secure algorithm like SHA-256.


In [None]:
import os
import clang.cindex

# Define insecure methods and their descriptions
INSECURE_METHODS = {
    'MD5': {
        'description': 'MD5 is a weak hashing algorithm that is vulnerable to collision attacks.',
        'suggestion': 'Use SHA-256 or better for hashing.'
    },
    'SHA1': {
        'description': 'SHA-1 is no longer considered secure due to vulnerability to collision attacks.',
        'suggestion': 'Use SHA-256 or better for hashing.'
    },
    'SHA-1': {
        'description': 'SHA-1 is no longer considered secure due to vulnerability to collision attacks.',
        'suggestion': 'Use SHA-256 or better for hashing.'
    }
}

def find_insecure_methods(node):
    """Recursively traverse the AST and find insecure methods."""
    if node.kind == clang.cindex.CursorKind.CALL_EXPR:
        method_info = INSECURE_METHODS.get(node.spelling)
        if method_info:
            print(f"Insecure method found: {node.spelling} at {node.location}")
            print(f"Description: {method_info['description']}")
            print(f"Suggestion: {method_info['suggestion']}")
            print('-' * 50)  # Separator for readability
    for child in node.get_children():
        find_insecure_methods(child)

def parse_file(file_path):
    """Parse the given C++ file and analyze it."""
    try:
        print(f"Parsing file: {file_path}")  # Debugging line
        index = clang.cindex.Index.create()
        translation_unit = index.parse(file_path)
        find_insecure_methods(translation_unit.cursor)
    except Exception as e:
        print(f"Error parsing {file_path}: {e}")

def parse_directory(directory):
    """Walk through the directory and analyze .cpp and .h files."""
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.cpp') or file.endswith('.h'):
                print(f"Scanning {file}...")
                parse_file(os.path.join(root, file))

# '/content/drive/MyDrive/sample.cpp'
directory_to_scan = '/content/drive/MyDrive/test'
parse_directory(directory_to_scan)

Scanning sample.cpp...
Parsing file: /content/drive/MyDrive/test/sample.cpp


## **Text Search for Insecure Methods**
This method won't parse the C++ syntax, but it can effectively find insecure function calls in a straightforward way

In [None]:
import os

# Define insecure methods and their descriptions
INSECURE_METHODS = {
    'MD5': {
        'description': 'MD5 is a weak hashing algorithm that is vulnerable to collision attacks.',
        'suggestion': 'Use SHA-256 or better for hashing.'
    },
    'SHA1': {
        'description': 'SHA-1 is no longer considered secure due to vulnerability to collision attacks.',
        'suggestion': 'Use SHA-256 or better for hashing.'
    },
    'SHA-1': {
        'description': 'SHA-1 is no longer considered secure due to vulnerability to collision attacks.',
        'suggestion': 'Use SHA-256 or better for hashing.'
    }
}

def find_insecure_methods(file_path):
    """Search for insecure methods in the given file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            for method, info in INSECURE_METHODS.items():
                if method in content:
                    print(f"Insecure method found: {method} in {file_path}")
                    print(f"Description: {info['description']}")
                    print(f"Suggestion: {info['suggestion']}")
                    print('-' * 50)  # Separator for readability
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

def parse_directory(directory):
    """Walk through the directory and analyze .cpp and .h files."""
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.cpp') or file.endswith('.h'):
                print(f"Scanning {file}...")
                find_insecure_methods(os.path.join(root, file))


directory_to_scan = '/content/drive/MyDrive/test'
parse_directory(directory_to_scan)

Scanning sample.cpp...
Insecure method found: MD5 in /content/drive/MyDrive/test/sample.cpp
Description: MD5 is a weak hashing algorithm that is vulnerable to collision attacks.
Suggestion: Use SHA-256 or better for hashing.
--------------------------------------------------
Insecure method found: SHA1 in /content/drive/MyDrive/test/sample.cpp
Description: SHA-1 is no longer considered secure due to vulnerability to collision attacks.
Suggestion: Use SHA-256 or better for hashing.
--------------------------------------------------
Insecure method found: SHA-1 in /content/drive/MyDrive/test/sample.cpp
Description: SHA-1 is no longer considered secure due to vulnerability to collision attacks.
Suggestion: Use SHA-256 or better for hashing.
--------------------------------------------------


In [None]:
!apt-get install -y cppcheck

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libtinyxml2-9 libz3-4 python3-pygments
Suggested packages:
  cppcheck-gui clang-tidy python-pygments-doc ttf-bitstream-vera
The following NEW packages will be installed:
  cppcheck libtinyxml2-9 libz3-4 python3-pygments
0 upgraded, 4 newly installed, 0 to remove and 49 not upgraded.
Need to get 8,564 kB of archives.
After this operation, 29.3 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libtinyxml2-9 amd64 9.0.0+dfsg-3 [32.5 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libz3-4 amd64 4.8.12-1 [5,766 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 python3-pygments all 2.11.2+dfsg-2 [750 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/universe amd64 cppcheck amd64 2.7-1 [2,016 kB]
Fetched 8,564 kB in 3s (2,930 kB/s)
Selecting previously unselected pack

In [None]:
import os
if os.path.exists(directory_to_scan):
    print("Directory contents:", os.listdir(directory_to_scan))
else:
    print("Directory does not exist.")

# Run Cppcheck and output to a text file
output_file = 'cppcheck_report.txt'
!cppcheck --enable=all {directory_to_scan} --output-file={output_file}

# Read and print the report if it exists
if os.path.exists(output_file):
    with open(output_file, 'r') as f:
        report = f.read()
    print(report)
else:
    print("Cppcheck report was not generated.")

Directory contents: ['sample.cpp']
[32mChecking /content/drive/MyDrive/test/sample.cpp ...[0m
[1m/content/drive/MyDrive/test/sample.cpp:16:0: [31mstyle:[39m The function 'safe_thread_func' is never used. [unusedFunction][0m

^
[1mnofile:0:0: [31minformation:[39m Cppcheck cannot find all the include files (use --check-config for details) [missingIncludeSystem][0m




In [None]:
import os
import re

def find_insecure_crypto(directory):
    # Regex pattern to match MD5 and SHA-1 function calls
    insecure_pattern = re.compile(r'\b(MD5|SHA1)\b', re.IGNORECASE)

    vulnerabilities = []

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.cpp'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    code = f.read()

                    # Search for insecure cryptographic methods
                    matches = insecure_pattern.findall(code)
                    if matches:
                        for match in matches:
                            vulnerability_info = {
                                "file": file_path,
                                "function": match,
                                "explanation": (
                                    f"{match} is an insecure cryptographic function. "
                                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                                    "that allow for collision attacks, where two different inputs "
                                    "produce the same hash. This can lead to security issues such as "
                                    "forgery of digital signatures and data integrity violations."
                                ),
                                "suggestion": (
                                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3. "
                                    "Additionally, review the cryptographic standards for your application "
                                    "to ensure compliance with modern security practices."
                                )
                            }
                            vulnerabilities.append(vulnerability_info)

    return vulnerabilities

# Example usage
directory_to_scan = '/content/drive/MyDrive/test'
results = find_insecure_crypto(directory_to_scan)

if results:
    for result in results:
        print(f"File: {result['file']}")
        print(f"Function: {result['function']}")
        print(f"Explanation: {result['explanation']}")
        print(f"Suggestion: {result['suggestion']}")
        print("-" * 80)  # Separator for better readability
else:
    print("No vulnerabilities found.")


File: /content/drive/MyDrive/test/sample.cpp
Function: md5
Explanation: md5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity violations.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3. Additionally, review the cryptographic standards for your application to ensure compliance with modern security practices.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample.cpp
Function: MD5
Explanation: MD5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity viol

In [None]:
import os
import re

def find_insecure_crypto(directory):
    # Regex pattern to match MD5 and SHA-1 function calls
    insecure_pattern = re.compile(r'\b(MD5|SHA1)\b', re.IGNORECASE)

    vulnerabilities = []

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.cpp'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    code = f.read()

                    # Search for insecure cryptographic methods
                    matches = insecure_pattern.findall(code)
                    if matches:
                        for match in matches:
                            vulnerability_info = {
                                "file": file_path,
                                "function": match,
                                "explanation": (
                                    f"{match} is an insecure cryptographic function. "
                                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                                    "that allow for collision attacks, where two different inputs "
                                    "produce the same hash. This can lead to security issues such as "
                                    "forgery of digital signatures and data integrity violations."
                                ),
                                "suggestion": (
                                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3. "
                                    "Additionally, review the cryptographic standards for your application "
                                    "to ensure compliance with modern security practices."
                                )
                            }
                            vulnerabilities.append(vulnerability_info)

    return vulnerabilities

# Example usage
directory_to_scan = '/content/drive/MyDrive/test'
results = find_insecure_crypto(directory_to_scan)

if results:
    for result in results:
        print(f"File: {result['file']}")
        print(f"Function: {result['function']}")
        print(f"Explanation: {result['explanation']}")
        print(f"Suggestion: {result['suggestion']}")
        print("-" * 80)  # Separator for better readability
else:
    print("No vulnerabilities found.")

File: /content/drive/MyDrive/test/sample.cpp
Function: md5
Explanation: md5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity violations.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3. Additionally, review the cryptographic standards for your application to ensure compliance with modern security practices.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample.cpp
Function: MD5
Explanation: MD5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity viol

In [None]:
import os
import re

def find_insecure_crypto(directory):
    # Regex pattern to match MD5 and SHA-1 function calls (including EVP_ prefixed ones)
    insecure_pattern = re.compile(r'\b(EVP_(md5|sha1)|MD5|SHA1)\b', re.IGNORECASE)

    vulnerabilities = []

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.cpp'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    code = f.read()

                    # Search for insecure cryptographic methods
                    matches = insecure_pattern.findall(code)
                    if matches:
                        for match in matches:
                            # Extract the function name
                            function_name = match[0]  # The full match (e.g., EVP_md5)
                            vulnerability_info = {
                                "file": file_path,
                                "function": function_name,
                                "explanation": (
                                    f"{function_name} is an insecure cryptographic function. "
                                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                                    "that allow for collision attacks, where two different inputs "
                                    "produce the same hash. This can lead to security issues such as "
                                    "forgery of digital signatures and data integrity violations."
                                ),
                                "suggestion": (
                                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3. "
                                    "Additionally, review the cryptographic standards for your application "
                                    "to ensure compliance with modern security practices."
                                )
                            }
                            vulnerabilities.append(vulnerability_info)

    return vulnerabilities

# Example usage
directory_to_scan = '/content/drive/MyDrive/test'
results = find_insecure_crypto(directory_to_scan)

if results:
    for result in results:
        print(f"File: {result['file']}")
        print(f"Function: {result['function']}")
        print(f"Explanation: {result['explanation']}")
        print(f"Suggestion: {result['suggestion']}")
        print("-" * 80)  # Separator for better readability
else:
    print("No vulnerabilities found.")

File: /content/drive/MyDrive/test/sample.cpp
Function: md5
Explanation: md5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity violations.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3. Additionally, review the cryptographic standards for your application to ensure compliance with modern security practices.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample.cpp
Function: MD5
Explanation: MD5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity viol

In [None]:
import subprocess

def run_cppcheck(file_path):
    command = ['cppcheck', '--enable=all', '--inconclusive', '--std=c++11', '--verbose', file_path]
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        print("Cppcheck Output:\n", result.stdout)
    except subprocess.CalledProcessError as e:
        print("Error running Cppcheck:\n", e.stderr)

# Example usage
cpp_file = '/content/drive/MyDrive/test/sample.cpp'
run_cppcheck(cpp_file)

Cppcheck Output:
 Checking /content/drive/MyDrive/test/sample.cpp ...
Defines:
Undefines:
Includes:
Platform:Native



In [None]:
!apt-get install valgrind

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  gdb libbabeltrace1 libc6-dbg libdebuginfod-common libdebuginfod1 libipt2
  libsource-highlight-common libsource-highlight4v5
Suggested packages:
  gdb-doc gdbserver valgrind-dbg valgrind-mpi kcachegrind alleyoop valkyrie
The following NEW packages will be installed:
  gdb libbabeltrace1 libc6-dbg libdebuginfod-common libdebuginfod1 libipt2
  libsource-highlight-common libsource-highlight4v5 valgrind
0 upgraded, 9 newly installed, 0 to remove and 50 not upgraded.
Need to get 32.3 MB of archives.
After this operation, 111 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdebuginfod-common all 0.186-1build1 [7,878 B]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libbabeltrace1 amd64 1.5.8-2build1 [160 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdebuginfod1 amd6

In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Define the path to your C++ file in Google Drive
cpp_file_path = '/content/drive/MyDrive/test/sample.cpp'

# Step 3: Suggest running analysis tools
def suggest_analysis(file_name):
    print(f"\nTo check for race conditions in {file_name}, run the following commands:")
    print(f"1. Using ThreadSanitizer:\n   !g++ -fsanitize=thread {file_name} -o output_file && ./output_file")
    print(f"2. Using Helgrind:\n   !valgrind --tool=helgrind ./output_file")

# Step 4: Analyze the file
suggest_analysis(cpp_file_path)

# Step 5: Compile and run the analysis (uncomment the next line to execute)
!g++ -fsanitize=thread {cpp_file_path} -o output_file && ./output_file
!valgrind --tool=helgrind ./output_file

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

To check for race conditions in /content/drive/MyDrive/test/sample.cpp, run the following commands:
1. Using ThreadSanitizer:
   !g++ -fsanitize=thread /content/drive/MyDrive/test/sample.cpp -o output_file && ./output_file
2. Using Helgrind:
   !valgrind --tool=helgrind ./output_file
[01m[K/content/drive/MyDrive/test/sample.cpp:[m[K In function ‘[01m[Kvoid md5_hash_function()[m[K’:
   33 |     [01;35m[KMD5((unsigned char*)input, strlen(input), output)[m[K; // Deprecated call
      |     [01;35m[K~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample.cpp:5[m[K:
[01m[K/usr/include/openssl/md5.h:52:38:[m[K [01;36m[Knote: [m[Kdeclared here
   52 | OSSL_DEPRECATEDIN_3_0 unsigned char *[01;36m[KMD5[m[K(const unsigned char *d, size_t n,
      |                             

In [None]:
!apt-get update
!apt-get install -y libssl-dev

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
0% [Waiting for headers] [Waiting for headers] [1 InRelease 3,626 B/3,626 B 100%] [Connected to r2u.                                                                                                    Get:2 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [Waiting for headers] [2 InRelease 12.7 kB/129 kB 10%] [1 InRelease 3,626 B/3,626 B 100%] [Connec0% [Waiting for headers] [2 InRelease 41.7 kB/129 kB 32%] [Connected to r2u.stat.illinois.edu (192.1                                                                                                    Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Ign:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Get:7 https://r2u.s

In [None]:
!openssl version

OpenSSL 3.0.2 15 Mar 2022 (Library: OpenSSL 3.0.2 15 Mar 2022)


In [None]:
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

[01m[K/content/drive/MyDrive/test/sample.cpp:[m[K In function ‘[01m[Kvoid md5_hash_function()[m[K’:
   33 |     [01;35m[KMD5((unsigned char*)input, strlen(input), output)[m[K; // Deprecated call
      |     [01;35m[K~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample.cpp:5[m[K:
[01m[K/usr/include/openssl/md5.h:52:38:[m[K [01;36m[Knote: [m[Kdeclared here
   52 | OSSL_DEPRECATEDIN_3_0 unsigned char *[01;36m[KMD5[m[K(const unsigned char *d, size_t n,
      |                                      [01;36m[K^~~[m[K


In [None]:
!./sample

Final shared_data (unsafe): 263476
Final shared_data (safe): 300000
MD5 (deprecated): 8d777f385d3dfec8815d20f7496026dc
SHA-1 (deprecated): a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd
MD5 (EVP): 8d777f385d3dfec8815d20f7496026dc
SHA-1 (EVP): a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd


In [None]:
!valgrind --tool=helgrind ./sample

==2261== Helgrind, a thread error detector
==2261== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==2261== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==2261== Command: ./sample
==2261== 
==2261== ---Thread-Announcement------------------------------------------
==2261== 
==2261== Thread #3 was created
==2261==    at 0x502D9F3: clone (clone.S:76)
==2261==    by 0x502E8EE: __clone_internal (clone-internal.c:83)
==2261==    by 0x4F9C6D8: create_thread (pthread_create.c:295)
==2261==    by 0x4F9D1FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==2261==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==2261==    by 0x4D98328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==2261==    by 0x10BBF7: std::thread::thread<void (&)(), , void>(void (&)()) (in /content/sample)
==2261==    by 0x10

In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample2.cpp'
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

In [None]:
!./sample

Final shared_data (safe): 300000
Hash-1: 8d777f385d3dfec8815d20f7496026dc
Hash-2: a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd


In [None]:
!valgrind --tool=helgrind ./sample

==3464== Helgrind, a thread error detector
==3464== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==3464== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==3464== Command: ./sample
==3464== 
Final shared_data (safe): 300000
Hash-1: 8d777f385d3dfec8815d20f7496026dc
Hash-2: a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd
==3464== 
==3464== Use --history-level=approx or =none to gain increased speed, at
==3464== the cost of reduced accuracy of conflicting-access information
==3464== For lists of detected and suppressed errors, rerun with: -s
==3464== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 882560 from 7)


In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample3.cpp'
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

In [None]:
!./sample

Final shared_data (safe): 300000
SHA-256: 3a6eb0790f39ac87c94f3856b2dd2c5d110e6811602261a9a923d3bb23adc8b7


In [None]:
!valgrind --tool=helgrind ./sample

==29729== Helgrind, a thread error detector
==29729== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==29729== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==29729== Command: ./sample
==29729== 
Final shared_data (safe): 300000
SHA-256: 3a6eb0790f39ac87c94f3856b2dd2c5d110e6811602261a9a923d3bb23adc8b7
==29729== 
==29729== Use --history-level=approx or =none to gain increased speed, at
==29729== the cost of reduced accuracy of conflicting-access information
==29729== For lists of detected and suppressed errors, rerun with: -s
==29729== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 873793 from 7)


In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample4.cpp'
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

In [None]:
!./sample

Final counter value: 154322


In [None]:
!valgrind --tool=helgrind ./sample

==35331== Helgrind, a thread error detector
==35331== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==35331== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==35331== Command: ./sample
==35331== 
==35331== ---Thread-Announcement------------------------------------------
==35331== 
==35331== Thread #3 was created
==35331==    at 0x4BE99F3: clone (clone.S:76)
==35331==    by 0x4BEA8EE: __clone_internal (clone-internal.c:83)
==35331==    by 0x4B586D8: create_thread (pthread_create.c:295)
==35331==    by 0x4B591FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==35331==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==35331==    by 0x4954328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==35331==    by 0x1095B7: std::thread::thread<void (&)(), , void>(void (&)()) (in /content/sample)
==35

In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample5.cpp'
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

In [None]:
!./sample

Final counter value: 200000


In [None]:
!valgrind --tool=helgrind ./sample

==35537== Helgrind, a thread error detector
==35537== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==35537== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==35537== Command: ./sample
==35537== 
Final counter value: 200000
==35537== 
==35537== Use --history-level=approx or =none to gain increased speed, at
==35537== the cost of reduced accuracy of conflicting-access information
==35537== For lists of detected and suppressed errors, rerun with: -s
==35537== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 580236 from 7)
