In [4]:
import os
import re
import sys
import colorama
from colorama import Fore, Style
from concurrent.futures import ThreadPoolExecutor, as_completed

print("NOTE - THIS WILL NOT DAMAGE ANY FILES. IT MAY TRIGGER ANTIVIRUS SOFTWARES")
print()

# Initialize Colorama for colored output.
colorama.init(autoreset=True)

# Expanded LANGUAGE_PATTERNS to 75 languages/formats.
# Notes on a few ambiguous extensions:
# - .pl may be Perl or Prolog → content patterns help disambiguate (e.g., "use strict" vs. ":-").
# - .m may be MATLAB or Objective-C → content patterns help disambiguate (e.g., "function" vs. "@interface").
# - .v may be Verilog (we map it to Verilog here).

LANGUAGE_PATTERNS = {
    'Python': {
        'extensions': ['.py'],
        'patterns': [r'\bdef\b', r'\bimport\b']
    },
    'C++': {
        'extensions': ['.cpp', '.cxx', '.cc', '.hpp', '.h'],
        'patterns': [r'#include', r'\bint\s+main\b']
    },
    'C': {
        'extensions': ['.c', '.h'],
        'patterns': [r'#include', r'\bint\s+main\b']
    },
    'Java': {
        'extensions': ['.java'],
        'patterns': [r'public\s+static\s+void\s+main', r'\bclass\s+\w+']
    },
    'JavaScript': {
        'extensions': ['.js', '.mjs', '.cjs'],
        'patterns': [r'\bfunction\b', r'console\.log']
    },
    'TypeScript': {
        'extensions': ['.ts'],
        'patterns': [r'\binterface\s+\w+', r':\s*\w+;']
    },
    'Ruby': {
        'extensions': ['.rb'],
        'patterns': [r'\bdef\b', r'\bend\b', r'\bputs\b']
    },
    'PHP': {
        'extensions': ['.php', '.phtml'],
        'patterns': [r'<\?php']
    },
    'HTML': {
        'extensions': ['.html', '.htm'],
        'patterns': [r'<html', r'<!DOCTYPE html>']
    },
    'CSS': {
        'extensions': ['.css'],
        'patterns': [r'\{', r'\}']
    },
    'Shell': {
        'extensions': ['.sh'],
        'patterns': [r'#!/bin/(bash|sh)', r'\bcase\b.+\bin\b']
    },
    'C#': {
        'extensions': ['.cs'],
        'patterns': [r'\busing\s+System', r'namespace\s+\w+']
    },
    'Swift': {
        'extensions': ['.swift'],
        'patterns': [r'\bimport\s+Foundation', r'\bstruct\s+\w+']
    },
    'Kotlin': {
        'extensions': ['.kt', '.kts'],
        'patterns': [r'\bfun\s+main', r'\bdata\s+class\b']
    },
    'Rust': {
        'extensions': ['.rs'],
        'patterns': [r'\bfn\s+main', r'\buse\s+\w+::']
    },
    'Go': {
        'extensions': ['.go'],
        'patterns': [r'\bpackage\s+main', r'\bfunc\s+main']
    },
    'Perl': {
        'extensions': ['.pl', '.pm'],
        'patterns': [r'\buse\s+strict\b', r'print\s+"']
    },
    'Scala': {
        'extensions': ['.scala', '.sc'],
        'patterns': [r'\bobject\s+\w+', r'\bdef\s+main']
    },
    'Haskell': {
        'extensions': ['.hs'],
        'patterns': [r'\bmodule\s+\w+', r'::']
    },
    'R': {
        'extensions': ['.r', '.R', '.Rscript'],
        'patterns': [r'<-', r'\blibrary\(']
    },
    'MATLAB': {
        'extensions': ['.m'],
        'patterns': [r'\bfunction\b', r'\bend\b']
    },
    'Objective-C': {
        'extensions': ['.m', '.mm', '.h'],
        'patterns': [r'@interface', r'#import']
    },
    'Dart': {
        'extensions': ['.dart'],
        'patterns': [r'\bvoid\s+main\s*\(', r'\bimport\s+\'']
    },
    'Lua': {
        'extensions': ['.lua'],
        'patterns': [r'\bfunction\s+\w+', r'\bend\b']
    },
    'SQL': {
        'extensions': ['.sql'],
        'patterns': [r'\bSELECT\b', r'\bINSERT\s+INTO\b']
    },
    'YAML': {
        'extensions': ['.yaml', '.yml'],
        'patterns': [r'^\s*\w+:\s', r'^\s*-\s+\w+']
    },
    'JSON': {
        'extensions': ['.json'],
        'patterns': [r'^\s*\{', r'\}\s*$']
    },
    'XML': {
        'extensions': ['.xml'],
        'patterns': [r'<\?xml', r'<\w+>']
    },
    'Markdown': {
        'extensions': ['.md', '.markdown'],
        'patterns': [r'^#\s', r'\*\*.+\*\*']
    },
    'LaTeX': {
        'extensions': ['.tex'],
        'patterns': [r'\\begin\{document\}', r'\\section\{']
    },

    # 31–75 (new additions)
    'Julia': {
        'extensions': ['.jl'],
        'patterns': [r'\bfunction\s+\w+', r'\bend\b', r'\busing\s+\w+']
    },
    'Fortran': {
        'extensions': ['.f', '.for', '.f90', '.f95'],
        'patterns': [r'\bPROGRAM\s+\w+', r'\bEND\s+PROGRAM\b']
    },
    'COBOL': {
        'extensions': ['.cob', '.cbl'],
        'patterns': [r'IDENTIFICATION\s+DIVISION', r'PROCEDURE\s+DIVISION']
    },
    'Pascal': {
        'extensions': ['.pas'],
        'patterns': [r'\bprogram\s+\w+;', r'\bbegin\b', r'\bend\.']
    },
    'Visual Basic': {
        'extensions': ['.vb'],
        'patterns': [r'\bImports\s+\w+', r'\bSub\s+Main\b']
    },
    'PowerShell': {
        'extensions': ['.ps1', '.psm1'],
        'patterns': [r'\bParam\(', r'\bWrite-Host\b']
    },
    'Batch': {
        'extensions': ['.bat', '.cmd'],
        'patterns': [r'@echo\s+off', r'\bgoto\s+\w+']
    },
    'Assembly': {
        'extensions': ['.asm', '.s'],
        'patterns': [r'\bglobal\b', r'\bsection\b', r'\bmov\b']
    },
    'Prolog': {
        'extensions': ['.pl', '.pro'],
        'patterns': [r':-', r'\?\-']
    },
    'Common Lisp': {
        'extensions': ['.lisp', '.lsp', '.cl'],
        'patterns': [r'\(defun\b', r'\(defvar\b']
    },
    'Scheme': {
        'extensions': ['.scm', '.ss'],
        'patterns': [r'\(define\b', r'\(lambda\b']
    },
    'Clojure': {
        'extensions': ['.clj', '.cljs', '.cljc'],
        'patterns': [r'\(ns\b', r'\(defn\b']
    },
    'OCaml': {
        'extensions': ['.ml', '.mli'],
        'patterns': [r'\blet\s+rec\b', r'\bmodule\b']
    },
    'F#': {
        'extensions': ['.fs', '.fsx'],
        'patterns': [r'\bopen\s+\w+', r'\blet\s+\w+\s*=']
    },
    'Elixir': {
        'extensions': ['.ex', '.exs'],
        'patterns': [r'\bdefmodule\s+\w+', r'\bdef\s+\w+']
    },
    'Erlang': {
        'extensions': ['.erl', '.hrl'],
        'patterns': [r'-module\(\w+\)\.', r'-export\(']
    },
    'Elm': {
        'extensions': ['.elm'],
        'patterns': [r'\bmodule\s+\w+\s+exposing\b', r'\bmain\s*=']
    },
    'Nim': {
        'extensions': ['.nim'],
        'patterns': [r'\bproc\s+\w+', r'\bimport\s+\w+']
    },
    'Crystal': {
        'extensions': ['.cr'],
        'patterns': [r'\bclass\s+\w+', r'\bdef\s+\w+']
    },
    'Solidity': {
        'extensions': ['.sol'],
        'patterns': [r'\bpragma\s+solidity\b', r'\bcontract\s+\w+']
    },
    'VHDL': {
        'extensions': ['.vhdl', '.vhd'],
        'patterns': [r'\bentity\s+\w+\s+is\b', r'\barchitecture\s+\w+\s+of\b']
    },
    'Verilog': {
        'extensions': ['.v', '.vh'],
        'patterns': [r'\bmodule\s+\w+', r'\bendmodule\b']
    },
    'SystemVerilog': {
        'extensions': ['.sv', '.svh'],
        'patterns': [r'\binterface\s+\w+', r'\bclass\s+\w+']
    },
    'CUDA': {
        'extensions': ['.cu', '.cuh'],
        'patterns': [r'__global__\s+void', r'#include\s*<cuda']
    },
    'OpenCL': {
        'extensions': ['.cl'],
        'patterns': [r'__kernel', r'get_global_id\s*\(']
    },
    'GLSL': {
        'extensions': ['.glsl', '.vert', '.frag', '.geom'],
        'patterns': [r'#version\s+\d+', r'\bvoid\s+main\s*\(']
    },
    'HLSL': {
        'extensions': ['.hlsl', '.fx', '.fxh'],
        'patterns': [r'\bfloat4\s+\w+\s*:\s*SV_Target', r'\bcbuffer\s+\w+']
    },
    'QML': {
        'extensions': ['.qml'],
        'patterns': [r'\bimport\s+QtQuick', r'\bItem\s*\{']
    },
    'Groovy': {
        'extensions': ['.groovy', '.gvy'],
        'patterns': [r'\bclass\s+\w+', r'\bdef\s+\w+']
    },
    'Gradle': {
        'extensions': ['.gradle', '.gradle.kts'],
        'patterns': [r'\bplugins\s*\{', r'\bdependencies\s*\{']
    },
    'SASS': {
        'extensions': ['.sass'],
        'patterns': [r'^:\w+\s', r'^\s+\w+:\s']
    },
    'SCSS': {
        'extensions': ['.scss'],
        'patterns': [r'\$\w+:\s', r'@mixin\b']
    },
    'Less': {
        'extensions': ['.less'],
        'patterns': [r'@\w+:\s', r'\.\w+\s*\{']
    },
    'CoffeeScript': {
        'extensions': ['.coffee'],
        'patterns': [r'->\s*$', r'\bclass\s+\w+']
    },
    'Handlebars': {
        'extensions': ['.hbs', '.handlebars'],
        'patterns': [r'\{\{\s*#\w+', r'\{\{\s*/\w+\s*\}\}']
    },
    'Mustache': {
        'extensions': ['.mustache'],
        'patterns': [r'\{\{\s*\w+\s*\}\}', r'\{\{\s*#\w+']
    },
    'Jinja2': {
        'extensions': ['.j2', '.jinja2', '.jinja'],
        'patterns': [r'\{\%[^%]*\%\}', r'\{\{\s*[^}]+\s*\}\}']
    },
    'Terraform': {
        'extensions': ['.tf', '.tfvars'],
        'patterns': [r'\bresource\s+"', r'\bprovider\s+"']
    },
    'HCL': {
        'extensions': ['.hcl'],
        'patterns': [r'\bvariable\s+"', r'\w+\s*=\s*\{']
    },
    'Puppet': {
        'extensions': ['.pp'],
        'patterns': [r'\bclass\s+\w+\s*\{', r'\bnode\s+default\b']
    },
    'Protobuf': {
        'extensions': ['.proto'],
        'patterns': [r'\bsyntax\s*=\s*"proto', r'\bmessage\s+\w+']
    },
    'Thrift': {
        'extensions': ['.thrift'],
        'patterns': [r'\bstruct\s+\w+', r'\bservice\s+\w+']
    },
    'GraphQL': {
        'extensions': ['.graphql', '.gql'],
        'patterns': [r'\btype\s+\w+\s*\{', r'\bquery\s*\{']
    },
    'D': {
        'extensions': ['.d'],
        'patterns': [r'\bimport\s+\w+;', r'\bvoid\s+main\s*\(']
    },
    'Zig': {
        'extensions': ['.zig'],
        'patterns': [r'\bpub\s+fn\s+main', r'\bconst\s+\w+\s*=']
    },
}

# ---- Colors ----
from colorama import Fore

# Keep your original explicit choices; we’ll auto-fill the rest below.
BASE_COLOR_MAP = {
    'Python': Fore.GREEN,
    'C++': Fore.BLUE,
    'C': Fore.CYAN,
    'Java': Fore.MAGENTA,
    'JavaScript': Fore.YELLOW,
    'TypeScript': Fore.LIGHTYELLOW_EX,
    'Ruby': Fore.RED,
    'PHP': Fore.LIGHTMAGENTA_EX,
    'HTML': Fore.LIGHTBLUE_EX,
    'CSS': Fore.LIGHTCYAN_EX,
    'Shell': Fore.LIGHTGREEN_EX,
    'C#': Fore.LIGHTRED_EX,
    'Swift': Fore.LIGHTYELLOW_EX,
    'Kotlin': Fore.LIGHTWHITE_EX,
    'Rust': Fore.WHITE,
    'Go': Fore.LIGHTBLACK_EX,
    'Perl': Fore.LIGHTBLUE_EX,
    'Scala': Fore.LIGHTCYAN_EX,
    'Haskell': Fore.LIGHTMAGENTA_EX,
    'R': Fore.LIGHTGREEN_EX,
    'MATLAB': Fore.LIGHTRED_EX,
    'Objective-C': Fore.LIGHTWHITE_EX,
    'Dart': Fore.LIGHTBLUE_EX,
    'Lua': Fore.LIGHTCYAN_EX,
    'SQL': Fore.YELLOW,
    'YAML': Fore.WHITE,
    'JSON': Fore.LIGHTBLACK_EX,
    'XML': Fore.CYAN,
    'Markdown': Fore.MAGENTA,
    'LaTeX': Fore.LIGHTWHITE_EX
}

# Cycle through a palette for anything not explicitly colored above.
_PALETTE = [
    Fore.GREEN, Fore.BLUE, Fore.CYAN, Fore.MAGENTA, Fore.YELLOW, Fore.RED, Fore.WHITE,
    Fore.LIGHTBLACK_EX, Fore.LIGHTRED_EX, Fore.LIGHTGREEN_EX, Fore.LIGHTYELLOW_EX,
    Fore.LIGHTBLUE_EX, Fore.LIGHTMAGENTA_EX, Fore.LIGHTCYAN_EX, Fore.LIGHTWHITE_EX
]

COLOR_MAP = {}
for i, lang in enumerate(sorted(LANGUAGE_PATTERNS.keys())):
    COLOR_MAP[lang] = BASE_COLOR_MAP.get(lang, _PALETTE[i % len(_PALETTE)])


def detect_language_for_file(file_path):
    """
    Attempts to detect the programming language of a file using its extension and content.
    Returns the language name if detected; otherwise, returns None.
    """
    _, ext = os.path.splitext(file_path)
    ext = ext.lower()
    possible_languages = []
    
    # Check file extension.
    for language, info in LANGUAGE_PATTERNS.items():
        if ext in info['extensions']:
            possible_languages.append(language)
    
    # Read a snippet of file content.
    try:
        with open(file_path, 'r', encoding='utf-8', errors="ignore") as f:
            content = f.read(1024)
    except Exception:
        return None

    # Check for language-specific patterns in content.
    for language, info in LANGUAGE_PATTERNS.items():
        for pattern in info['patterns']:
            if re.search(pattern, content):
                return language

    # Fallback: if the extension suggests a language, return the first match.
    if possible_languages:
        return possible_languages[0]
    return None

def collect_file_paths(directory, ignore_dirs):
    """
    Recursively collects all file paths from 'directory', ignoring any directory that contains
    any string from 'ignore_dirs'.
    """
    file_paths = []
    for root, dirs, files in os.walk(directory):
        if any(ignored.lower() in root.lower() for ignored in ignore_dirs):
            continue
        for file in files:
            file_paths.append(os.path.join(root, file))
    return file_paths

def main():
    # Prompt for directory input and expand tilde if used.
    directory = os.path.expanduser(input("Enter the directory to scan: ").strip())
    if not os.path.isdir(directory):
        print("Invalid directory. Please enter a valid directory path.")
        sys.exit(1)
    
    # Define directories to ignore. Adjust as needed.
    ignore_dirs = []  # e.g., on macOS: ["/System", "/Applications", "/Library"]

    print(f"\nCollecting files from: {directory}")
    file_paths = collect_file_paths(directory, ignore_dirs)
    total_files = len(file_paths)
    print(f"Total files found: {total_files}\n")

    language_counts = {}

    # Process files concurrently.
    with ThreadPoolExecutor(max_workers=8) as executor:
        futures = {executor.submit(detect_language_for_file, fp): fp for fp in file_paths}
        processed = 0
        for future in as_completed(futures):
            processed += 1
            lang = future.result()
            if lang:
                language_counts[lang] = language_counts.get(lang, 0) + 1
            if processed % 100 == 0 or processed == total_files:
                print(f"Processed {processed}/{total_files} files...", end='\r')

        print("\n\nDetected Programming Languages Report:")

    # Only show languages that were actually detected (> 0 files)
    nonzero = [(lang, cnt) for lang, cnt in language_counts.items() if cnt > 0]

    if not nonzero:
        print("No programming languages detected in the scanned directory.")
        return

    # Alphabetical (or switch to key=lambda x: -x[1] for count-desc)
    for language, count in sorted(nonzero, key=lambda x: x[0]):
        color = COLOR_MAP.get(language, Fore.WHITE)
        print(f"{color}{language}: {count} files{Style.RESET_ALL}")

# Run the main function.
main()


NOTE - THIS WILL NOT DAMAGE ANY FILES. IT MAY TRIGGER ANTIVIRUS SOFTWARES

Enter the directory to scan: /Volumes/T9 1/UE_5.5

Collecting files from: /Volumes/T9 1/UE_5.5
Total files found: 212006

Processed 212006/212006 files...

Detected Programming Languages Report:
Assembly: 40 files
Batch: 36 files
C: 15 files
C#: 67 files
C++: 117714 files
CSS: 37713 files
CoffeeScript: 1 files
F#: 28 files
Fortran: 1 files
GLSL: 2 files
Gradle: 3 files
HCL: 1 files
HTML: 9 files
Haskell: 766 files
Java: 4803 files
JavaScript: 377 files
Julia: 405 files
Less: 1 files
Markdown: 2912 files
OCaml: 10 files
Pascal: 354 files
Perl: 7 files
PowerShell: 1 files
Prolog: 9 files
Protobuf: 28 files
Python: 4062 files
R: 45 files
Ruby: 286 files
Rust: 2 files
SCSS: 11 files
Scala: 217 files
Scheme: 15 files
Shell: 82 files
Solidity: 109 files
Swift: 13 files
Thrift: 1565 files
TypeScript: 1009 files
XML: 5750 files
YAML: 33 files
Zig: 2 files
