In [None]:
import os
import re

def remove_comments_and_strings(line):
    # Remove single-line comments
    line = re.sub(r'//.*', '', line)
    # Remove multi-line comments
    line = re.sub(r'/\*.*?\*/', '', line, flags=re.DOTALL)
    # Remove string literals
    line = re.sub(r'"([^"\\]*(\\.[^"\\]*)*)"', '', line)
    return line.strip()

def find_insecure_crypto(directory):
    # List of weak functions
    weak_funcs = [
        "MD5", "SHA1", "EVP_md5", "EVP_sha1",
        "DES_ecb_encrypt"
    ]
    # Create a regex pattern to match the weak functions
    insecure_pattern = re.compile(r'\b(' + '|'.join(weak_funcs) + r')\b', re.IGNORECASE)

    vulnerabilities = []

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.cpp'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    for line_number, line in enumerate(f, start=1):
                        cleaned_line = remove_comments_and_strings(line)

                        # Search for insecure cryptographic methods in the cleaned line
                        if cleaned_line and insecure_pattern.search(cleaned_line):
                            function_name = insecure_pattern.search(cleaned_line).group(0)  # Get the matched function
                            vulnerability_info = {
                                "file": file_path,
                                "line": line_number,
                                "function": function_name,
                                "explanation": (
                                    f"{function_name} is an insecure cryptographic function. "
                                    "These functions are considered weak due to vulnerabilities "
                                    "that allow for collision attacks, where two different inputs "
                                    "produce the same hash. This can lead to security issues such as "
                                    "forgery of digital signatures and data integrity violations."
                                ),
                                "suggestion": (
                                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3. "
                                    "Additionally, review the cryptographic standards for your application "
                                    "to ensure compliance with modern security practices."
                                )
                            }
                            vulnerabilities.append(vulnerability_info)

    return vulnerabilities

# Example usage
directory_to_scan = '/content/drive/MyDrive/test'
results = find_insecure_crypto(directory_to_scan)

if results:
    for result in results:
        print(f"File: {result['file']}")
        print(f"Line: {result['line']}")
        print(f"Function: {result['function']}")
        print(f"Explanation: {result['explanation']}")
        print(f"Suggestion: {result['suggestion']}")
        print("-" * 80)  # Separator for better readability
else:
    print("No vulnerabilities found.")

File: /content/drive/MyDrive/test/sample.cpp
Line: 5
Function: md5
Explanation: md5 is an insecure cryptographic function. These functions are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data integrity violations.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3. Additionally, review the cryptographic standards for your application to ensure compliance with modern security practices.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample.cpp
Line: 33
Function: MD5
Explanation: MD5 is an insecure cryptographic function. These functions are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. This can lead to security issues such as forgery of digital signatures and data int

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]
        self.insecure_pattern = re.compile(r'\b(EVP_(md5|sha1)|MD5|SHA1|DES_ecb_encrypt)\b', re.IGNORECASE)

    def analyze(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        elif node.kind == clang.cindex.CursorKind.IF_STMT:
            for child in node.get_children():
                if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

        if node.kind == clang.cindex.CursorKind.STRING_LITERAL:
            if any(re.search(pattern, node.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                self.report_string_literal_vulnerability(node, file_path)

        # Additional regex check on line content
        self.check_line_for_insecure_patterns(node, file_path)

    def check_line_for_insecure_patterns(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)
        clean_line = self.remove_strings_and_comments(line_content)

        if self.insecure_pattern.search(clean_line):
            function_name = self.insecure_pattern.search(clean_line).group(0)
            vulnerability_info = {
                "file": file_path,
                "line_number": line_number,
                "line": line_content.strip(),
                "function": function_name,
                "explanation": (
                    f"{function_name} is an insecure cryptographic function. "
                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                    "that allow for collision attacks."
                ),
                "suggestion": (
                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3."
                )
            }
            self.vulnerabilities.append(vulnerability_info)

    def remove_strings_and_comments(self, code):
        # Remove C++ single-line comments (//)
        code = re.sub(r'//.*', '', code)
        # Remove C++ multi-line comments (/* ... */)
        code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
        # Remove string literals
        code = re.sub(r'"(.*?)"', '', code)
        code = re.sub(r"'(.*?)'", '', code)
        return code

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        print(f"String literal suggests weak cryptographic algorithm: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        additional_info = " (line number may refer to header file or included file)" if line_number > self.get_line_count(file_path) else ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
            Visiting node: CursorKind.TYPE_REF (std::vector::allocator_type) at line 523
        Visiting node: CursorKind.TYPE_REF (std::vector::_Base) at line 524
        Visiting node: CursorKind.UNEXPOSED_EXPR () at line 524
          Visiting node: CursorKind.CALL_EXPR (_S_check_init_len) at line 524
            Visiting node: CursorKind.DECL_REF_EXPR (_S_check_init_len) at line 524
            Visiting node: CursorKind.DECL_REF_EXPR (__n) at line 524
            Visiting node: CursorKind.DECL_REF_EXPR (__a) at line 524
          Visiting node: CursorKind.DECL_REF_EXPR (__a) at line 524
        Visiting node: CursorKind.COMPOUND_STMT () at line 525
          Visiting node: CursorKind.CALL_EXPR (_M_fill_initialize) at line 525
            Visiting node: CursorKind.MEMBER_REF_EXPR (_M_fill_initialize) at line 525
            Visiting node: CursorKind.DECL_REF_EXPR (__n) at line 525
            Visiting node: CursorKind

In [None]:
import clang.cindex
import os
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []

    def analyze(self):
        # Walk through the directory
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)  # Two spaces per indentation level
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind in [clang.cindex.CursorKind.CALL_EXPR, clang.cindex.CursorKind.UNEXPOSED_EXPR]:
            weak_funcs = [
                "MD5", "SHA1", "EVP_md5", "EVP_sha1",
                "EVP_DigestInit", "EVP_DigestUpdate", "EVP_DigestFinal"
            ]

            # Check for direct function calls
            if isinstance(node, clang.cindex.Cursor):
                if any(weak_func in node.spelling for weak_func in weak_funcs):
                    self.report_vulnerability(node, file_path)
                else:
                    # Check children for unexposed expressions
                    for child in node.get_children():
                        if any(weak_func in child.spelling for weak_func in weak_funcs):
                            self.report_vulnerability(child, file_path)
                            break

    def report_vulnerability(self, node, file_path):
        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {node.location.line}")
        vulnerability_info = {
            "file": file_path,
            "line_number": node.location.line,
            "line": self.get_line_content(file_path, node.location.line),
            "function": node.spelling,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. This can lead to security issues such as "
                "forgery of digital signatures and data integrity violations."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3. "
                "Additionally, review the cryptographic standards for your application "
                "to ensure compliance with modern security practices."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        """Retrieve the content of the specified line from the file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)  # Separator for better readability
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER) at line 492
      Visiting node: CursorKind.PARM_DECL (md) at line 492
        Visiting node: CursorKind.TYPE_REF (EVP_MD) at line 492
      Visiting node: CursorKind.PARM_DECL (en_de) at line 493
      Visiting node: CursorKind.PARM_DECL (libctx) at line 493
        Visiting node: CursorKind.TYPE_REF (OSSL_LIB_CTX) at line 493
      Visiting node: CursorKind.PARM_DECL (propq) at line 493
    Visiting node: CursorKind.FUNCTION_DECL (EVP_MD_get_type) at line 531
      Visiting node: CursorKind.PARM_DECL (md) at line 531
        Visiting node: CursorKind.TYPE_REF (EVP_MD) at line 531
    Visiting node: CursorKind.FUNCTION_DECL (EVP_MD_get0_name) at line 534
      Visiting node: CursorKind.PARM_DECL (md) at line 534
        Visiting node: CursorKind.TYPE_REF (EVP_MD) at line 534
    Visiting node: CursorKind.FUNCTION_DECL (EVP_MD_get0_description) at line 536
      Visi

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt)\b.*\b(init|update|final|ecb)\b'
        ]

    def analyze(self):
        # Walk through the directory
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)  # Two spaces per indentation level
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        for child in node.get_children():
            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def get_header_info(self, function_name):
        """Return the header file associated with the weak function, if known."""
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        # Check if line number exceeds file content
        if line_number > self.get_line_count(file_path):
            additional_info = " (line number may refer to header file or included file)"
        else:
            additional_info = ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. DES is also considered weak due to its "
                "short key length and vulnerabilities to various attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES. Additionally, "
                "review the cryptographic standards for your application to ensure "
                "compliance with modern security practices."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        """Retrieve the content of the specified line from the file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        """Retrieve the total number of lines in the specified file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result['header']}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)  # Separator for better readability
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    Visiting node: CursorKind.FUNCTION_DECL (EVP_DigestVerifyUpdate) at line 838
      Visiting node: CursorKind.PARM_DECL (ctx) at line 838
        Visiting node: CursorKind.TYPE_REF (EVP_MD_CTX) at line 838
      Visiting node: CursorKind.PARM_DECL (data) at line 838
      Visiting node: CursorKind.PARM_DECL (dsize) at line 838
    Visiting node: CursorKind.FUNCTION_DECL (EVP_DigestVerifyFinal) at line 839
      Visiting node: CursorKind.PARM_DECL (ctx) at line 839
        Visiting node: CursorKind.TYPE_REF (EVP_MD_CTX) at line 839
      Visiting node: CursorKind.PARM_DECL (sig) at line 839
      Visiting node: CursorKind.PARM_DECL (siglen) at line 840
    Visiting node: CursorKind.FUNCTION_DECL (EVP_OpenInit) at line 842
      Visiting node: CursorKind.PARM_DECL (ctx) at line 842
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 842
      Visiting node: CursorKind.PARM_DECL (type) at line 842
        

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'  # New broader patterns
        ]

    def analyze(self):
        # Walk through the directory
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)  # Two spaces per indentation level
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        for child in node.get_children():
            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def get_header_info(self, function_name):
        """Return the header file associated with the weak function, if known."""
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        # Check if line number exceeds file content
        if line_number > self.get_line_count(file_path):
            additional_info = " (line number may refer to header file or included file)"
        else:
            additional_info = ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. DES is also considered weak due to its "
                "short key length and vulnerabilities to various attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES. Additionally, "
                "review the cryptographic standards for your application to ensure "
                "compliance with modern security practices."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        """Retrieve the content of the specified line from the file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        """Retrieve the total number of lines in the specified file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result['header']}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)  # Separator for better readability
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    Visiting node: CursorKind.FUNCTION_DECL (EVP_DigestVerifyUpdate) at line 838
      Visiting node: CursorKind.PARM_DECL (ctx) at line 838
        Visiting node: CursorKind.TYPE_REF (EVP_MD_CTX) at line 838
      Visiting node: CursorKind.PARM_DECL (data) at line 838
      Visiting node: CursorKind.PARM_DECL (dsize) at line 838
    Visiting node: CursorKind.FUNCTION_DECL (EVP_DigestVerifyFinal) at line 839
      Visiting node: CursorKind.PARM_DECL (ctx) at line 839
        Visiting node: CursorKind.TYPE_REF (EVP_MD_CTX) at line 839
      Visiting node: CursorKind.PARM_DECL (sig) at line 839
      Visiting node: CursorKind.PARM_DECL (siglen) at line 840
    Visiting node: CursorKind.FUNCTION_DECL (EVP_OpenInit) at line 842
      Visiting node: CursorKind.PARM_DECL (ctx) at line 842
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 842
      Visiting node: CursorKind.PARM_DECL (type) at line 842
        

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]

    def analyze(self):
        # Walk through the directory
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)  # Two spaces per indentation level
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Analyze only CALL_EXPR nodes
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            # Check for dynamic patterns with regex
            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        # Traverse child nodes for additional checks
        for child in node.get_children():
            # Skip string literals
            if child.kind == clang.cindex.CursorKind.STRING_LITERAL:
                continue

            # Check for weak functions in child nodes
            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def get_header_info(self, function_name):
        """Return the header file associated with the weak function, if known."""
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        # Check if line number exceeds file content
        if line_number > self.get_line_count(file_path):
            additional_info = " (line number may refer to header file or included file)"
        else:
            additional_info = ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. DES is also considered weak due to its "
                "short key length and vulnerabilities to various attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES. Additionally, "
                "review the cryptographic standards for your application to ensure "
                "compliance with modern security practices."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        """Retrieve the content of the specified line from the file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        """Retrieve the total number of lines in the specified file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result['header']}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)  # Separator for better readability
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 780
      Visiting node: CursorKind.PARM_DECL (outm) at line 780
      Visiting node: CursorKind.PARM_DECL (outl) at line 781
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CipherInit) at line 783
      Visiting node: CursorKind.PARM_DECL (ctx) at line 783
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 783
      Visiting node: CursorKind.PARM_DECL (cipher) at line 783
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER) at line 783
      Visiting node: CursorKind.PARM_DECL (key) at line 784
      Visiting node: CursorKind.PARM_DECL (iv) at line 784
      Visiting node: CursorKind.PARM_DECL (enc) at line 785
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CipherInit_ex) at line 786
      Visiting node: CursorKind.PARM_DECL (ctx) at line 786
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 786
      Visiting nod

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]

    def analyze(self):
        # Walk through the directory
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)  # Two spaces per indentation level
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Analyze only CALL_EXPR nodes
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            # Check for dynamic patterns with regex
            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        # Check for weak cryptographic terms in string literals
        for child in node.get_children():
            if child.kind == clang.cindex.CursorKind.STRING_LITERAL:
                if any(re.search(pattern, child.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                    self.report_string_literal_vulnerability(child, file_path)

            # Check for weak functions in other child nodes
            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        print(f"String literal suggests weak cryptographic algorithm: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context of this string for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        """Return the header file associated with the weak function, if known."""
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        # Check if line number exceeds file content
        if line_number > self.get_line_count(file_path):
            additional_info = " (line number may refer to header file or included file)"
        else:
            additional_info = ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. DES is also considered weak due to its "
                "short key length and vulnerabilities to various attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES. Additionally, "
                "review the cryptographic standards for your application to ensure "
                "compliance with modern security practices."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        """Retrieve the content of the specified line from the file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        """Retrieve the total number of lines in the specified file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)  # Separator for better readability
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      Visiting node: CursorKind.PARM_DECL (out) at line 866
      Visiting node: CursorKind.PARM_DECL (outl) at line 866
    Visiting node: CursorKind.FUNCTION_DECL (EVP_DecodeBlock) at line 867
      Visiting node: CursorKind.PARM_DECL (t) at line 867
      Visiting node: CursorKind.PARM_DECL (f) at line 867
      Visiting node: CursorKind.PARM_DECL (n) at line 867
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CIPHER_CTX_new) at line 873
      Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 873
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CIPHER_CTX_reset) at line 874
      Visiting node: CursorKind.PARM_DECL (c) at line 874
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 874
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CIPHER_CTX_free) at line 875
      Visiting node: CursorKind.PARM_DECL (c) at line 875
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 875
  

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt", "SHA1_Init", "SHA1_Update",
            "SHA1_Final", "SHA1_Transform", "PKCS5_PBKDF2_HMAC_SHA1",
            "EVP_md5_sha1", "MD5_CTX", "MD5state_st",
            "MD5_Init", "MD5_Update", "MD5_Final", "MD5_Transform"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]

    def analyze(self):
        # Walk through the directory
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)  # Two spaces per indentation level
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Check CALL_EXPR nodes for weak functions
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            # Check for dynamic patterns with regex
            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        # Check IF_STMT for conditions that may involve weak functions
        elif node.kind == clang.cindex.CursorKind.IF_STMT:
            for child in node.get_children():
                if child.kind == clang.cindex.CursorKind.UNEXPOSED_EXPR:
                    # Check for potential weak functions within the expression
                    if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                        header_info = self.get_header_info(child.spelling)
                        self.report_vulnerability(child, file_path, header_info)

        # Check for weak cryptographic terms in string literals
        for child in node.get_children():
            if child.kind == clang.cindex.CursorKind.STRING_LITERAL:
                if any(re.search(pattern, child.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                    self.report_string_literal_vulnerability(child, file_path)

            # Check for weak functions in other child nodes
            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        print(f"String literal suggests weak cryptographic algorithm: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context of this string for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        """Return the header file associated with the weak function, if known."""
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        # Check if line number exceeds file content
        if line_number > self.get_line_count(file_path):
            additional_info = " (line number may refer to header file or included file)"
        else:
            additional_info = ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. DES is also considered weak due to its "
                "short key length and vulnerabilities to various attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES. Additionally, "
                "review the cryptographic standards for your application to ensure "
                "compliance with modern security practices."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        """Retrieve the content of the specified line from the file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        """Retrieve the total number of lines in the specified file."""
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)  # Separator for better readability
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      Visiting node: CursorKind.PARM_DECL (out) at line 866
      Visiting node: CursorKind.PARM_DECL (outl) at line 866
    Visiting node: CursorKind.FUNCTION_DECL (EVP_DecodeBlock) at line 867
      Visiting node: CursorKind.PARM_DECL (t) at line 867
      Visiting node: CursorKind.PARM_DECL (f) at line 867
      Visiting node: CursorKind.PARM_DECL (n) at line 867
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CIPHER_CTX_new) at line 873
      Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 873
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CIPHER_CTX_reset) at line 874
      Visiting node: CursorKind.PARM_DECL (c) at line 874
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 874
    Visiting node: CursorKind.FUNCTION_DECL (EVP_CIPHER_CTX_free) at line 875
      Visiting node: CursorKind.PARM_DECL (c) at line 875
        Visiting node: CursorKind.TYPE_REF (EVP_CIPHER_CTX) at line 875
  

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt", "SHA1_Init", "SHA1_Update",
            "SHA1_Final", "SHA1_Transform", "PKCS5_PBKDF2_HMAC_SHA1",
            "EVP_md5_sha1", "MD5_CTX", "MD5state_st",
            "MD5_Init", "MD5_Update", "MD5_Final", "MD5_Transform"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]
        self.insecure_pattern = re.compile(r'\b(EVP_(md5|sha1)|MD5|SHA1|DES_ecb_encrypt)\b', re.IGNORECASE)

    def analyze(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Check for weak functions
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

        elif node.kind == clang.cindex.CursorKind.IF_STMT:
            # Check the children of IF_STMT for weak functions
            for child in node.get_children():
                self.check_child_node(child, file_path)

        elif node.kind == clang.cindex.CursorKind.UNEXPOSED_EXPR:
            # Handle unexposed expressions
            for child in node.get_children():
                self.check_child_node(child, file_path)

        # Check for string literals for potential vulnerabilities
        elif node.kind == clang.cindex.CursorKind.STRING_LITERAL:
            if any(re.search(pattern, node.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                self.report_string_literal_vulnerability(node, file_path)

        # Additional regex check on line content
        self.check_line_for_insecure_patterns(node, file_path)

    def check_child_node(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

    def check_line_for_insecure_patterns(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)
        clean_line = self.remove_strings_and_comments(line_content)

        if self.insecure_pattern.search(clean_line):
            function_name = self.insecure_pattern.search(clean_line).group(0)
            vulnerability_info = {
                "file": file_path,
                "line_number": line_number,
                "line": line_content.strip(),
                "function": function_name,
                "explanation": (
                    f"{function_name} is an insecure cryptographic function. "
                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                    "that allow for collision attacks."
                ),
                "suggestion": (
                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3."
                )
            }
            self.vulnerabilities.append(vulnerability_info)

    def remove_strings_and_comments(self, code):
        code = re.sub(r'//.*', '', code)  # Single-line comments
        code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)  # Multi-line comments
        code = re.sub(r'"(.*?)"', '', code)  # Double-quoted strings
        code = re.sub(r"'(.*?)'", '', code)  # Single-quoted strings
        return code

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        print(f"String literal suggests weak cryptographic algorithm: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        additional_info = " (line number may refer to header file or included file)" if line_number > self.get_line_count(file_path) else ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
File: /content/drive/MyDrive/test/sample6.cpp
Line Number: 95
Line: if (EVP_DigestInit_ex(mdctx, EVP_sha1(), nullptr) != 1 ||
Function: EVP_sha1
Header: N/A
Explanation: EVP_sha1 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample6.cpp
Line Number: 95
Line: if (EVP_DigestInit_ex(mdctx, EVP_sha1(), nullptr) != 1 ||
Function: EVP_sha1
Header: N/A
Explanation: EVP_sha1 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3.
----------------------------------------------------------------------

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt", "SHA1_Init", "SHA1_Update",
            "SHA1_Final", "SHA1_Transform", "PKCS5_PBKDF2_HMAC_SHA1",
            "EVP_md5_sha1", "MD5_CTX", "MD5state_st",
            "MD5_Init", "MD5_Update", "MD5_Final", "MD5_Transform"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]
        self.insecure_pattern = re.compile(r'\b(EVP_(md5|sha1)|MD5|SHA1|DES_ecb_encrypt)\b', re.IGNORECASE)

    def analyze(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        # Detect weak cryptographic function usage
        self.detect_weak_crypto(node, file_path)

        # Recursively traverse all child nodes
        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            # Check for weak functions
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

        elif node.kind == clang.cindex.CursorKind.IF_STMT:
            # Check the children of IF_STMT for weak functions
            for child in node.get_children():
                self.check_child_node(child, file_path)

        elif node.kind == clang.cindex.CursorKind.UNEXPOSED_EXPR:
            # Handle unexposed expressions
            for child in node.get_children():
                self.check_child_node(child, file_path)

        # Check for string literals for potential vulnerabilities
        elif node.kind == clang.cindex.CursorKind.STRING_LITERAL:
            if any(re.search(pattern, node.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                self.report_string_literal_vulnerability(node, file_path)

        # Additional regex check on line content
        self.check_line_for_insecure_patterns(node, file_path)

    def check_child_node(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

    def check_line_for_insecure_patterns(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        # Check if the line content matches the insecure patterns
        if self.insecure_pattern.search(line_content):
            function_name = self.insecure_pattern.search(line_content).group(0)
            vulnerability_info = {
                "file": file_path,
                "line_number": line_number,
                "line": line_content.strip(),
                "function": function_name,
                "explanation": (
                    f"{function_name} is an insecure cryptographic function. "
                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                    "that allow for collision attacks."
                ),
                "suggestion": (
                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3."
                )
            }
            self.vulnerabilities.append(vulnerability_info)

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        print(f"String literal suggests weak cryptographic algorithm: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        additional_info = " (line number may refer to header file or included file)" if line_number > self.get_line_count(file_path) else ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
File: /content/drive/MyDrive/test/sample6.cpp
Line Number: 157
Line: DES_ecb_encrypt(&input_block, &output_block, &schedule, DES_DECRYPT);
Function: DES_ecb_encrypt
Header: N/A
Explanation: DES_ecb_encrypt is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample6.cpp
Line Number: 63
Line: std::cerr << "Error creating MD5 context" << std::endl;
Function: MD5
Header: N/A
Explanation: MD5 is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3.
-------------------------------------------------------

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.reported_ids = set()  # To keep track of reported vulnerabilities
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt", "SHA1_Init", "SHA1_Update",
            "SHA1_Final", "SHA1_Transform", "PKCS5_PBKDF2_HMAC_SHA1",
            "EVP_md5_sha1", "MD5_CTX", "MD5state_st",
            "MD5_Init", "MD5_Update", "MD5_Final", "MD5_Transform"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]
        self.insecure_pattern = re.compile(r'\b(EVP_(md5|sha1)|MD5|SHA1|DES_ecb_encrypt)\b', re.IGNORECASE)

    def analyze(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path, indent=0):
        indent_str = ' ' * (indent * 2)
        if node.location.file:
            print(f"{indent_str}Visiting node: {node.kind} ({node.spelling}) at line {node.location.line}")

        self.detect_weak_crypto(node, file_path)

        for child in node.get_children():
            self.traverse_ast(child, file_path, indent + 1)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if node.spelling in self.weak_funcs:
                unique_id = (node.spelling, node.location.line)  # Create a unique identifier
                if unique_id not in self.reported_ids:
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)
                    self.reported_ids.add(unique_id)  # Add the unique identifier to the set

        elif node.kind == clang.cindex.CursorKind.IF_STMT:
            for child in node.get_children():
                self.check_child_node(child, file_path)

        elif node.kind == clang.cindex.CursorKind.UNEXPOSED_EXPR:
            for child in node.get_children():
                self.check_child_node(child, file_path)

        elif node.kind == clang.cindex.CursorKind.STRING_LITERAL:
            if any(re.search(pattern, node.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                self.report_string_literal_vulnerability(node, file_path)

        self.check_line_for_insecure_patterns(node, file_path)

    def check_child_node(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if node.spelling in self.weak_funcs:
                unique_id = (node.spelling, node.location.line)  # Create a unique identifier
                if unique_id not in self.reported_ids:
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)
                    self.reported_ids.add(unique_id)  # Add the unique identifier to the set

    def check_line_for_insecure_patterns(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        if self.insecure_pattern.search(line_content):
            function_name = self.insecure_pattern.search(line_content).group(0)
            vulnerability_info = {
                "file": file_path,
                "line_number": line_number,
                "line": line_content.strip(),
                "function": function_name,
                "explanation": (
                    f"{function_name} is an insecure cryptographic function. "
                    "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                    "that allow for collision attacks."
                ),
                "suggestion": (
                    "Consider using stronger hashing algorithms like SHA-256 or SHA-3."
                )
            }
            self.vulnerabilities.append(vulnerability_info)

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        print(f"String literal suggests weak cryptographic algorithm: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        # Get the header information from the weak_headers dictionary
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        additional_info = " (line number may refer to header file or included file)" if line_number > self.get_line_count(file_path) else ""

        print(f"Weak cryptographic algorithm detected: {node.spelling} at line {line_number}")
        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
            Visiting node: CursorKind.FUNCTION_DECL (current_exception) at line 101
              Visiting node: CursorKind.VISIBILITY_ATTR (default) at line 34
              Visiting node: CursorKind.TYPE_REF (class std::__exception_ptr::exception_ptr) at line 101
              Visiting node: CursorKind.NAMESPACE_REF (std) at line 101
          Visiting node: CursorKind.FRIEND_DECL () at line 102
            Visiting node: CursorKind.FUNCTION_DECL (rethrow_exception) at line 102
              Visiting node: CursorKind.VISIBILITY_ATTR (default) at line 34
              Visiting node: CursorKind.NAMESPACE_REF (std) at line 102
              Visiting node: CursorKind.PARM_DECL () at line 102
                Visiting node: CursorKind.TYPE_REF (class std::__exception_ptr::exception_ptr) at line 102
          Visiting node: CursorKind.FRIEND_DECL () at line 104
            Visiting node: CursorKind.FUNCTION_TEMPLATE (make_exce

In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample6.cpp'
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

[01m[K/content/drive/MyDrive/test/sample6.cpp:[m[K In function ‘[01m[Kvoid md5_hash_function()[m[K’:
   34 |     [01;35m[KMD5((unsigned char*)input, strlen(input), output)[m[K; // Deprecated call
      |     [01;35m[K~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample6.cpp:5[m[K:
[01m[K/usr/include/openssl/md5.h:52:38:[m[K [01;36m[Knote: [m[Kdeclared here
   52 | OSSL_DEPRECATEDIN_3_0 unsigned char *[01;36m[KMD5[m[K(const unsigned char *d, size_t n,
      |                                      [01;36m[K^~~[m[K
[01m[K/content/drive/MyDrive/test/sample6.cpp:[m[K In function ‘[01m[Kvoid des_encrypt(const string&, const string&)[m[K’:
  119 |     [01;35m[KDES_set_key_checked(&key_schedule, &schedule)[m[K;
      |     [01;35m[K~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample6.cpp:8[m[K:
[01m[K/usr/include/o

In [None]:
!./sample

Final shared_data (unsafe): 258834
Final shared_data (safe): 300000
MD5 (deprecated): 8d777f385d3dfec8815d20f7496026dc
SHA-1 (deprecated): a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd
MD5 (EVP): 8d777f385d3dfec8815d20f7496026dc
SHA-1 (EVP): a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd
Encrypted (DES): 0bcd681d82c2f9d92cd67ffd9e9487cd
Decrypted (DES): Ȍ�D#�UW���Z��]


In [None]:
!apt-get install valgrind

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  gdb libbabeltrace1 libc6-dbg libdebuginfod-common libdebuginfod1 libipt2
  libsource-highlight-common libsource-highlight4v5
Suggested packages:
  gdb-doc gdbserver valgrind-dbg valgrind-mpi kcachegrind alleyoop valkyrie
The following NEW packages will be installed:
  gdb libbabeltrace1 libc6-dbg libdebuginfod-common libdebuginfod1 libipt2
  libsource-highlight-common libsource-highlight4v5 valgrind
0 upgraded, 9 newly installed, 0 to remove and 49 not upgraded.
Need to get 32.3 MB of archives.
After this operation, 111 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdebuginfod-common all 0.186-1build1 [7,878 B]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libbabeltrace1 amd64 1.5.8-2build1 [160 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdebuginfod1 amd6

In [None]:
!valgrind --tool=helgrind ./sample

==39373== Helgrind, a thread error detector
==39373== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==39373== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==39373== Command: ./sample
==39373== 
==39373== ---Thread-Announcement------------------------------------------
==39373== 
==39373== Thread #3 was created
==39373==    at 0x502D9F3: clone (clone.S:76)
==39373==    by 0x502E8EE: __clone_internal (clone-internal.c:83)
==39373==    by 0x4F9C6D8: create_thread (pthread_create.c:295)
==39373==    by 0x4F9D1FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==39373==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==39373==    by 0x4D98328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==39373==    by 0x10C96D: std::thread::thread<void (&)(), , void>(void (&)()) (in /content/sample)
==39

In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample5.cpp'
!g++ {cpp_file_path} -o sample -lssl -lcrypto -pthread

In [None]:
!valgrind --tool=helgrind ./sample

==39782== Helgrind, a thread error detector
==39782== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==39782== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==39782== Command: ./sample
==39782== 
Final counter value: 200000
==39782== 
==39782== Use --history-level=approx or =none to gain increased speed, at
==39782== the cost of reduced accuracy of conflicting-access information
==39782== For lists of detected and suppressed errors, rerun with: -s
==39782== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 94547 from 7)


In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample.cpp'
!g++ -g {cpp_file_path} -o sample -lssl -lcrypto -pthread -lpthread

[01m[K/content/drive/MyDrive/test/sample.cpp:[m[K In function ‘[01m[Kvoid md5_hash_function()[m[K’:
   33 |     [01;35m[KMD5((unsigned char*)input, strlen(input), output)[m[K; // Deprecated call
      |     [01;35m[K~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample.cpp:5[m[K:
[01m[K/usr/include/openssl/md5.h:52:38:[m[K [01;36m[Knote: [m[Kdeclared here
   52 | OSSL_DEPRECATEDIN_3_0 unsigned char *[01;36m[KMD5[m[K(const unsigned char *d, size_t n,
      |                                      [01;36m[K^~~[m[K


In [None]:
!valgrind --tool=helgrind --trace-children=yes ./sample

==44208== Helgrind, a thread error detector
==44208== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==44208== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==44208== Command: ./sample
==44208== 
==44208== ---Thread-Announcement------------------------------------------
==44208== 
==44208== Thread #3 was created
==44208==    at 0x502D9F3: clone (clone.S:76)
==44208==    by 0x502E8EE: __clone_internal (clone-internal.c:83)
==44208==    by 0x4F9C6D8: create_thread (pthread_create.c:295)
==44208==    by 0x4F9D1FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==44208==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==44208==    by 0x4D98328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==44208==    by 0x10BBF7: std::thread::thread<void (&)(), , void>(void (&)()) (std_thread.h:143)
==4420

In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample2.cpp'
!g++ -g {cpp_file_path} -o sample -lssl -lcrypto -pthread -lpthread

In [None]:
!valgrind --tool=helgrind --trace-children=yes ./sample

==44725== Helgrind, a thread error detector
==44725== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==44725== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==44725== Command: ./sample
==44725== 
Final shared_data (safe): 300000
Hash-1: 8d777f385d3dfec8815d20f7496026dc
Hash-2: a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd
==44725== 
==44725== Use --history-level=approx or =none to gain increased speed, at
==44725== the cost of reduced accuracy of conflicting-access information
==44725== For lists of detected and suppressed errors, rerun with: -s
==44725== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 129603 from 7)


In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample3.cpp'
!g++ -g {cpp_file_path} -o sample -lssl -lcrypto -pthread -lpthread

In [None]:
!valgrind --tool=helgrind --trace-children=yes ./sample

==45014== Helgrind, a thread error detector
==45014== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==45014== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==45014== Command: ./sample
==45014== 
Final shared_data (safe): 300000
SHA-256: 3a6eb0790f39ac87c94f3856b2dd2c5d110e6811602261a9a923d3bb23adc8b7
==45014== 
==45014== Use --history-level=approx or =none to gain increased speed, at
==45014== the cost of reduced accuracy of conflicting-access information
==45014== For lists of detected and suppressed errors, rerun with: -s
==45014== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 822982 from 7)


In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample4.cpp'
!g++ -g {cpp_file_path} -o sample -lssl -lcrypto -pthread -lpthread

In [None]:
!valgrind --tool=helgrind --trace-children=yes ./sample

==45115== Helgrind, a thread error detector
==45115== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==45115== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==45115== Command: ./sample
==45115== 
==45115== ---Thread-Announcement------------------------------------------
==45115== 
==45115== Thread #3 was created
==45115==    at 0x4BE99F3: clone (clone.S:76)
==45115==    by 0x4BEA8EE: __clone_internal (clone-internal.c:83)
==45115==    by 0x4B586D8: create_thread (pthread_create.c:295)
==45115==    by 0x4B591FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==45115==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==45115==    by 0x4954328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==45115==    by 0x1095B7: std::thread::thread<void (&)(), , void>(void (&)()) (std_thread.h:143)
==4511

In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample5.cpp'
!g++ -g {cpp_file_path} -o sample -lssl -lcrypto -pthread -lpthread

In [None]:
!valgrind --tool=helgrind --trace-children=yes ./sample

==45540== Helgrind, a thread error detector
==45540== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==45540== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==45540== Command: ./sample
==45540== 
Final counter value: 200000
==45540== 
==45540== Use --history-level=approx or =none to gain increased speed, at
==45540== the cost of reduced accuracy of conflicting-access information
==45540== For lists of detected and suppressed errors, rerun with: -s
==45540== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 413865 from 7)


In [None]:
cpp_file_path = '/content/drive/MyDrive/test/sample6.cpp'
!g++ -g {cpp_file_path} -o sample -lssl -lcrypto -pthread -lpthread

[01m[K/content/drive/MyDrive/test/sample6.cpp:[m[K In function ‘[01m[Kvoid md5_hash_function()[m[K’:
   34 |     [01;35m[KMD5((unsigned char*)input, strlen(input), output)[m[K; // Deprecated call
      |     [01;35m[K~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample6.cpp:5[m[K:
[01m[K/usr/include/openssl/md5.h:52:38:[m[K [01;36m[Knote: [m[Kdeclared here
   52 | OSSL_DEPRECATEDIN_3_0 unsigned char *[01;36m[KMD5[m[K(const unsigned char *d, size_t n,
      |                                      [01;36m[K^~~[m[K
[01m[K/content/drive/MyDrive/test/sample6.cpp:[m[K In function ‘[01m[Kvoid des_encrypt(const string&, const string&)[m[K’:
  119 |     [01;35m[KDES_set_key_checked(&key_schedule, &schedule)[m[K;
      |     [01;35m[K~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~[m[K
In file included from [01m[K/content/drive/MyDrive/test/sample6.cpp:8[m[K:
[01m[K/usr/include/o

In [None]:
!valgrind --tool=helgrind --trace-children=yes ./sample

==45726== Helgrind, a thread error detector
==45726== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==45726== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==45726== Command: ./sample
==45726== 
==45726== ---Thread-Announcement------------------------------------------
==45726== 
==45726== Thread #3 was created
==45726==    at 0x502D9F3: clone (clone.S:76)
==45726==    by 0x502E8EE: __clone_internal (clone-internal.c:83)
==45726==    by 0x4F9C6D8: create_thread (pthread_create.c:295)
==45726==    by 0x4F9D1FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==45726==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==45726==    by 0x4D98328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==45726==    by 0x10C96D: std::thread::thread<void (&)(), , void>(void (&)()) (std_thread.h:143)
==4572

In [None]:
import glob
import os
import subprocess

# Directory containing your .cpp files
cpp_dir = '/content/drive/MyDrive/test/*.cpp'

# Compile and run each .cpp file
for cpp_file_path in glob.glob(cpp_dir):
    # Get the base name without extension for the output file
    base_name = os.path.splitext(os.path.basename(cpp_file_path))[0]

    print(f"Compiling {cpp_file_path}...")

    # Compile the C++ file
    compile_command = f'g++ -g {cpp_file_path} -o {base_name} -lssl -lcrypto -pthread'
    compile_status = os.system(compile_command)

    if compile_status != 0:
        print(f"Compilation failed for {cpp_file_path}.")
        continue

    print(f"Running valgrind on {base_name}...")

    # Run valgrind and capture output
    try:
        valgrind_output = subprocess.check_output(
            ['valgrind', '--tool=helgrind', '--trace-children=yes', f'./{base_name}'],
            stderr=subprocess.STDOUT,
            text=False  # Capture output as bytes
        )
        print(valgrind_output.decode(errors='replace'))  # Decode with error handling
    except subprocess.CalledProcessError as e:
        print(f"Valgrind encountered an error:\n{e.output.decode(errors='replace')}")

    print(f"Finished processing {base_name}.")

Compiling /content/drive/MyDrive/test/sample.cpp...
Running valgrind on sample...
==32906== Helgrind, a thread error detector
==32906== Copyright (C) 2007-2017, and GNU GPL'd, by OpenWorks LLP et al.
==32906== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==32906== Command: ./sample
==32906== 
==32906== ---Thread-Announcement------------------------------------------
==32906== 
==32906== Thread #3 was created
==32906==    at 0x502D9F3: clone (clone.S:76)
==32906==    by 0x502E8EE: __clone_internal (clone-internal.c:83)
==32906==    by 0x4F9C6D8: create_thread (pthread_create.c:295)
==32906==    by 0x4F9D1FF: pthread_create@@GLIBC_2.34 (pthread_create.c:828)
==32906==    by 0x4853767: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so)
==32906==    by 0x4D98328: std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==32906==    by 0x10B

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt", "SHA1_Init", "SHA1_Update",
            "SHA1_Final", "SHA1_Transform", "PKCS5_PBKDF2_HMAC_SHA1",
            "EVP_md5_sha1", "MD5_CTX", "MD5state_st",
            "MD5_Init", "MD5_Update", "MD5_Final", "MD5_Transform"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]

    def analyze(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path):
        self.detect_weak_crypto(node, file_path)

        for child in node.get_children():
            self.traverse_ast(child, file_path)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        elif node.kind == clang.cindex.CursorKind.IF_STMT:
            for child in node.get_children():
                if child.kind == clang.cindex.CursorKind.UNEXPOSED_EXPR:
                    if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                        header_info = self.get_header_info(child.spelling)
                        self.report_vulnerability(child, file_path, header_info)

        for child in node.get_children():
            if child.kind == clang.cindex.CursorKind.STRING_LITERAL:
                if any(re.search(pattern, child.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                    self.report_string_literal_vulnerability(child, file_path)

            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context of this string for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        if line_number > self.get_line_count(file_path):
            additional_info = " (line number may refer to header file or included file)"
        else:
            additional_info = ""

        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks, where two different inputs "
                "produce the same hash. DES is also considered weak due to its "
                "short key length and vulnerabilities to various attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES. Additionally, "
                "review the cryptographic standards for your application to ensure "
                "compliance with modern security practices."
            ),
            "additional_info": additional_info
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def get_line_count(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return sum(1 for _ in f)

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}{result.get('additional_info', '')}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result.get('header', 'N/A')}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File: /content/drive/MyDrive/test/sample.cpp
Line Number: 41
Line: 
Function: MD5state_st
Header: <openssl/md5.h>
Explanation: MD5state_st is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks, where two different inputs produce the same hash. DES is also considered weak due to its short key length and vulnerabilities to various attacks.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3, or more secure encryption methods such as AES. Additionally, review the cryptographic standards for your application to ensure compliance with modern security practices.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample.cpp
Line Number: 46
Line: SHA1((unsigned char*)input, strlen(input), output); // 

In [None]:
import clang.cindex
import os
import re
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

class CppVulnerabilityAnalyzer:
    def __init__(self, directory):
        self.directory = directory
        self.index = clang.cindex.Index.create()
        self.vulnerabilities = []
        self.weak_funcs = [
            "MD5", "SHA1", "EVP_md5", "EVP_sha1",
            "DES_ecb_encrypt", "SHA1_Init", "SHA1_Update",
            "SHA1_Final", "SHA1_Transform", "PKCS5_PBKDF2_HMAC_SHA1",
            "EVP_md5_sha1", "MD5_CTX", "MD5state_st",
            "MD5_Init", "MD5_Update", "MD5_Final", "MD5_Transform"
        ]
        self.weak_headers = {
            "MD5": "<openssl/md5.h>",
            "SHA1": "<openssl/sha.h>",
            "EVP_md5": "<openssl/evp.h>",
            "EVP_sha1": "<openssl/evp.h>",
            "DES_ecb_encrypt": "<openssl/des.h>",
            "SHA1_Init": "<openssl/sha.h>",
            "SHA1_Update": "<openssl/sha.h>",
            "SHA1_Final": "<openssl/sha.h>",
            "SHA1_Transform": "<openssl/sha.h>",
            "PKCS5_PBKDF2_HMAC_SHA1": "<openssl/evp.h>",
            "EVP_md5_sha1": "<openssl/evp.h>",
            "MD5_CTX": "<openssl/md5.h>",
            "MD5state_st": "<openssl/md5.h>",
            "MD5_Init": "<openssl/md5.h>",
            "MD5_Update": "<openssl/md5.h>",
            "MD5_Final": "<openssl/md5.h>",
            "MD5_Transform": "<openssl/md5.h>"
        }
        self.dynamic_patterns = [
            r'\b(md5|sha1|des)\b',
            r'\b(digest|hash|encrypt|generate)\b.*\b(init|update|final|ecb)\b',
            r'\b(use|apply|create|compute)\b.*\b(md5|sha1|des)\b'
        ]

    def analyze(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.cpp'):
                    file_path = os.path.join(root, file)
                    translation_unit = self.index.parse(file_path)
                    self.traverse_ast(translation_unit.cursor, file_path)

    def traverse_ast(self, node, file_path):
        self.detect_weak_crypto(node, file_path)
        for child in node.get_children():
            self.traverse_ast(child, file_path)

    def detect_weak_crypto(self, node, file_path):
        if node.kind == clang.cindex.CursorKind.CALL_EXPR:
            if any(weak_func in node.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(node.spelling)
                self.report_vulnerability(node, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, node.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(node.spelling)
                    self.report_vulnerability(node, file_path, header_info)

        for child in node.get_children():
            if child.kind == clang.cindex.CursorKind.STRING_LITERAL:
                if any(re.search(pattern, child.spelling, re.IGNORECASE) for pattern in self.dynamic_patterns):
                    self.report_string_literal_vulnerability(child, file_path)

            if any(weak_func in child.spelling for weak_func in self.weak_funcs):
                header_info = self.get_header_info(child.spelling)
                self.report_vulnerability(child, file_path, header_info)

            for pattern in self.dynamic_patterns:
                if re.search(pattern, child.spelling, re.IGNORECASE):
                    header_info = self.get_header_info(child.spelling)
                    self.report_vulnerability(child, file_path, header_info)

    def report_string_literal_vulnerability(self, node, file_path):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": "String Literal",
            "header": "N/A",
            "explanation": (
                "This string literal suggests a potential use of a weak cryptographic function. "
                "Review the context of this string for security concerns."
            ),
            "suggestion": (
                "Consider reviewing the string content for potential security issues related to "
                "weak cryptographic algorithms."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_header_info(self, function_name):
        return self.weak_headers.get(function_name, "Unknown header")

    def report_vulnerability(self, node, file_path, header_info):
        line_number = node.location.line
        line_content = self.get_line_content(file_path, line_number)

        vulnerability_info = {
            "file": file_path,
            "line_number": line_number,
            "line": line_content,
            "function": node.spelling,
            "header": header_info,
            "explanation": (
                f"{node.spelling} is an insecure cryptographic function. "
                "Both MD5 and SHA-1 are considered weak due to vulnerabilities "
                "that allow for collision attacks."
            ),
            "suggestion": (
                "Consider using stronger hashing algorithms like SHA-256 or SHA-3, "
                "or more secure encryption methods such as AES."
            )
        }
        self.vulnerabilities.append(vulnerability_info)

    def get_line_content(self, file_path, line_number):
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            return lines[line_number - 1].strip() if 0 < line_number <= len(lines) else ""

    def report(self):
        if self.vulnerabilities:
            for result in self.vulnerabilities:
                print(f"File: {result['file']}")
                print(f"Line Number: {result['line_number']}")
                print(f"Line: {result['line']}")
                print(f"Function: {result['function']}")
                print(f"Header: {result['header']}")
                print(f"Explanation: {result['explanation']}")
                print(f"Suggestion: {result['suggestion']}")
                print("-" * 80)
        else:
            print("\nNo vulnerabilities detected.")

if __name__ == "__main__":
    # Change this to the path in your Google Drive where the C++ files are located
    directory_to_scan = '/content/drive/MyDrive/test'
    analyzer = CppVulnerabilityAnalyzer(directory_to_scan)
    analyzer.analyze()
    analyzer.report()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File: /content/drive/MyDrive/test/sample.cpp
Line Number: 41
Line: 
Function: MD5state_st
Header: <openssl/md5.h>
Explanation: MD5state_st is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks.
Suggestion: Consider using stronger hashing algorithms like SHA-256 or SHA-3, or more secure encryption methods such as AES.
--------------------------------------------------------------------------------
File: /content/drive/MyDrive/test/sample.cpp
Line Number: 46
Line: SHA1((unsigned char*)input, strlen(input), output); // Deprecated call
Function: MD5_CTX
Header: <openssl/md5.h>
Explanation: MD5_CTX is an insecure cryptographic function. Both MD5 and SHA-1 are considered weak due to vulnerabilities that allow for collision attacks.
Suggestion: Consider using stronger hashing algorithms 