In [39]:
import os
import re
import sys
import colorama
from colorama import Fore, Style
from concurrent.futures import ThreadPoolExecutor, as_completed

print("NOTE - THIS WILL NOT DAMAGE ANY FILES. IT MAY TRIGGER ANTIVIRUS SOFTWARES")
print()

# Initialize Colorama for colored output.
colorama.init(autoreset=True)

# ---------- Language patterns (75 total) ----------
LANGUAGE_PATTERNS = {
    'Python': {'extensions': ['.py'], 'patterns': [r'\bdef\b', r'\bimport\b']},
    'C++': {'extensions': ['.cpp', '.cxx', '.cc', '.hpp', '.h'], 'patterns': [r'#include', r'\bint\s+main\b']},
    'C': {'extensions': ['.c', '.h'], 'patterns': [r'#include', r'\bint\s+main\b']},
    'Java': {'extensions': ['.java'], 'patterns': [r'public\s+static\s+void\s+main', r'\bclass\s+\w+']},
    'JavaScript': {'extensions': ['.js', '.mjs', '.cjs'], 'patterns': [r'\bfunction\b', r'console\.log']},
    'TypeScript': {'extensions': ['.ts'], 'patterns': [r'\binterface\s+\w+', r':\s*\w+;']},
    'Ruby': {'extensions': ['.rb'], 'patterns': [r'\bdef\b', r'\bend\b', r'\bputs\b']},
    'PHP': {'extensions': ['.php', '.phtml'], 'patterns': [r'<\?php']},
    'HTML': {'extensions': ['.html', '.htm'], 'patterns': [r'<html', r'<!DOCTYPE html>']},
    'CSS': {'extensions': ['.css'], 'patterns': [r'\{', r'\}']},
    'Shell': {'extensions': ['.sh'], 'patterns': [r'#!/bin/(bash|sh)', r'\bcase\b.+\bin\b']},
    'C#': {'extensions': ['.cs'], 'patterns': [r'\busing\s+System', r'namespace\s+\w+']},
    'Swift': {'extensions': ['.swift'], 'patterns': [r'\bimport\s+Foundation', r'\bstruct\s+\w+']},
    'Kotlin': {'extensions': ['.kt', '.kts'], 'patterns': [r'\bfun\s+main', r'\bdata\s+class\b']},
    'Rust': {'extensions': ['.rs'], 'patterns': [r'\bfn\s+main', r'\buse\s+\w+::']},
    'Go': {'extensions': ['.go'], 'patterns': [r'\bpackage\s+main', r'\bfunc\s+main']},
    'Perl': {'extensions': ['.pl', '.pm'], 'patterns': [r'\buse\s+strict\b', r'print\s+"']},
    'Scala': {'extensions': ['.scala', '.sc'], 'patterns': [r'\bobject\s+\w+', r'\bdef\s+main']},
    # Tightened Haskell to avoid CSS '::before' etc.
    'Haskell': {'extensions': ['.hs'], 'patterns': [
        r'^\s*module\s+[A-Z][\w\.]*\s+where\b',
        r'^\s*[a-zA-Z_][\w\']*\s*::\s*[A-Z][\w\.\[\]\(\)\s\-\>]*$'
    ]},
    'R': {'extensions': ['.r', '.R', '.Rscript'], 'patterns': [r'<-', r'\blibrary\(']},
    'MATLAB': {'extensions': ['.m'], 'patterns': [r'\bfunction\b', r'^\s*end\s*$']},
    'Objective-C': {'extensions': ['.m', '.mm', '.h'], 'patterns': [r'@interface', r'#import']},
    'Dart': {'extensions': ['.dart'], 'patterns': [r'\bvoid\s+main\s*\(', r'\bimport\s+\'']},
    'Lua': {'extensions': ['.lua'], 'patterns': [r'\bfunction\s+\w+', r'\bend\b']},
    'SQL': {'extensions': ['.sql'], 'patterns': [r'\bSELECT\b', r'\bINSERT\s+INTO\b']},
    'YAML': {'extensions': ['.yaml', '.yml'], 'patterns': [r'^\s*\w+:\s', r'^\s*-\s+\w+']},
    'JSON': {'extensions': ['.json'], 'patterns': [r'^\s*\{', r'\}\s*$']},
    'XML': {'extensions': ['.xml'], 'patterns': [r'<\?xml', r'<\w+>']},
    'Markdown': {'extensions': ['.md', '.markdown'], 'patterns': [r'^#\s', r'\*\*.+\*\*']},
    'LaTeX': {'extensions': ['.tex'], 'patterns': [r'\\begin\{document\}', r'\\section\{']},
    'Julia': {'extensions': ['.jl'], 'patterns': [r'\bfunction\s+\w+', r'\bend\b', r'\busing\s+\w+']},
    'Fortran': {'extensions': ['.f', '.for', '.f90', '.f95'], 'patterns': [r'\bPROGRAM\s+\w+', r'\bEND\s+PROGRAM\b']},
    'COBOL': {'extensions': ['.cob', '.cbl'], 'patterns': [r'IDENTIFICATION\s+DIVISION', r'PROCEDURE\s+DIVISION']},
    # Tightened Pascal cues
    'Pascal': {'extensions': ['.pas'], 'patterns': [r'^\s*program\s+\w+;', r'^\s*uses\s+\w+(,\s*\w+)*;', r'^\s*begin\s*$']},
    'Visual Basic': {'extensions': ['.vb'], 'patterns': [r'\bImports\s+\w+', r'\bSub\s+Main\b']},
    'PowerShell': {'extensions': ['.ps1', '.psm1'], 'patterns': [r'\bParam\(', r'\bWrite-Host\b']},
    'Batch': {'extensions': ['.bat', '.cmd'], 'patterns': [r'@echo\s+off', r'\bgoto\s+\w+']},
    'Assembly': {'extensions': ['.asm', '.s'], 'patterns': [r'\bglobal\b', r'\bsection\b', r'\bmov\b']},
    # Tightened Prolog cues to avoid random ':-'
    'Prolog': {'extensions': ['.pl', '.pro'], 'patterns': [r':-\s*(module\(|dynamic\b|use_module\b|op\()', r'\?\-\s*\w+\(']},
    'Common Lisp': {'extensions': ['.lisp', '.lsp', '.cl'], 'patterns': [r'\(defun\b', r'\(defvar\b']},
    'Scheme': {'extensions': ['.scm', '.ss'], 'patterns': [r'\(define\b', r'\(lambda\b']},
    'Clojure': {'extensions': ['.clj', '.cljs', '.cljc'], 'patterns': [r'\(ns\b', r'\(defn\b']},
    'OCaml': {'extensions': ['.ml', '.mli'], 'patterns': [r'\blet\s+rec\b', r'\bmodule\b']},
    # Tightened F#
    'F#': {'extensions': ['.fs', '.fsx'], 'patterns': [r'^\s*(namespace|module)\s+\w+', r'^\s*open\s+[A-Z]\w+(?:\.[A-Z]\w+)*', r'^\s*let\s+[a-zA-Z_][\w\']*\s*=']},
    'Elixir': {'extensions': ['.ex', '.exs'], 'patterns': [r'\bdefmodule\s+\w+', r'\bdef\s+\w+']},
    'Erlang': {'extensions': ['.erl', '.hrl'], 'patterns': [r'-module\(\w+\)\.', r'-export\(']},
    'Elm': {'extensions': ['.elm'], 'patterns': [r'\bmodule\s+\w+\s+exposing\b', r'\bmain\s*=']},
    'Nim': {'extensions': ['.nim'], 'patterns': [r'\bproc\s+\w+', r'\bimport\s+\w+']},
    'Crystal': {'extensions': ['.cr'], 'patterns': [r'\bclass\s+\w+', r'\bdef\s+\w+']},
    'Solidity': {'extensions': ['.sol'], 'patterns': [r'\bpragma\s+solidity\b', r'\bcontract\s+\w+']},
    'VHDL': {'extensions': ['.vhdl', '.vhd'], 'patterns': [r'\bentity\s+\w+\s+is\b', r'\barchitecture\s+\w+\s+of\b']},
    'Verilog': {'extensions': ['.v', '.vh'], 'patterns': [r'\bmodule\s+\w+', r'\bendmodule\b']},
    'SystemVerilog': {'extensions': ['.sv', '.svh'], 'patterns': [r'\binterface\s+\w+', r'\bclass\s+\w+']},
    'CUDA': {'extensions': ['.cu', '.cuh'], 'patterns': [r'__global__\s+void', r'#include\s*<cuda']},
    'OpenCL': {'extensions': ['.cl'], 'patterns': [r'__kernel', r'get_global_id\s*\(']},
    'GLSL': {'extensions': ['.glsl', '.vert', '.frag', '.geom'], 'patterns': [r'#version\s+\d+', r'\bvoid\s+main\s*\(']},
    'HLSL': {'extensions': ['.hlsl', '.fx', '.fxh'], 'patterns': [r'\bfloat4\s+\w+\s*:\s*SV_Target', r'\bcbuffer\s+\w+']},
    'QML': {'extensions': ['.qml'], 'patterns': [r'\bimport\s+QtQuick', r'\bItem\s*\{']},
    'Groovy': {'extensions': ['.groovy', '.gvy'], 'patterns': [r'\bclass\s+\w+', r'\bdef\s+\w+']},
    'Gradle': {'extensions': ['.gradle', '.gradle.kts'], 'patterns': [r'\bplugins\s*\{', r'\bdependencies\s*\{']},
    'SASS': {'extensions': ['.sass'], 'patterns': [r'^:\w+\s', r'^\s+\w+:\s']},
    'SCSS': {'extensions': ['.scss'], 'patterns': [r'\$\w+:\s', r'@mixin\b']},
    'Less': {'extensions': ['.less'], 'patterns': [r'@\w+:\s', r'\.\w+\s*\{']},
    'CoffeeScript': {'extensions': ['.coffee'], 'patterns': [r'->\s*$', r'\bclass\s+\w+']},
    'Handlebars': {'extensions': ['.hbs', '.handlebars'], 'patterns': [r'\{\{\s*#\w+', r'\{\{\s*/\w+\s*\}\}']},
    'Mustache': {'extensions': ['.mustache'], 'patterns': [r'\{\{\s*\w+\s*\}\}', r'\{\{\s*#\w+']},
    'Jinja2': {'extensions': ['.j2', '.jinja2', '.jinja'], 'patterns': [r'\{\%[^%]*\%\}', r'\{\{\s*[^}]+\s*\}\}']},
    'Terraform': {'extensions': ['.tf', '.tfvars'], 'patterns': [r'\bresource\s+"', r'\bprovider\s+"']},
    'HCL': {'extensions': ['.hcl'], 'patterns': [r'\bvariable\s+"', r'\w+\s*=\s*\{']},
    'Puppet': {'extensions': ['.pp'], 'patterns': [r'\bclass\s+\w+\s*\{', r'\bnode\s+default\b']},
    'Protobuf': {'extensions': ['.proto'], 'patterns': [r'\bsyntax\s*=\s*"proto', r'\bmessage\s+\w+']},
    'Thrift': {'extensions': ['.thrift'], 'patterns': [r'\bstruct\s+\w+', r'\bservice\s+\w+']},
    'GraphQL': {'extensions': ['.graphql', '.gql'], 'patterns': [r'\btype\s+\w+\s*\{', r'\bquery\s*\{']},
    'D': {'extensions': ['.d'], 'patterns': [r'\bimport\s+\w+;', r'\bvoid\s+main\s*\(']},
    'Zig': {'extensions': ['.zig'], 'patterns': [r'\bpub\s+fn\s+main', r'\bconst\s+\w+\s*=']}
}

# ---------- Colors ----------
BASE_COLOR_MAP = {
    'Python': Fore.GREEN, 'C++': Fore.BLUE, 'C': Fore.CYAN, 'Java': Fore.MAGENTA,
    'JavaScript': Fore.YELLOW, 'TypeScript': Fore.LIGHTYELLOW_EX, 'Ruby': Fore.RED,
    'PHP': Fore.LIGHTMAGENTA_EX, 'HTML': Fore.LIGHTBLUE_EX, 'CSS': Fore.LIGHTCYAN_EX,
    'Shell': Fore.LIGHTGREEN_EX, 'C#': Fore.LIGHTRED_EX, 'Swift': Fore.LIGHTYELLOW_EX,
    'Kotlin': Fore.LIGHTWHITE_EX, 'Rust': Fore.WHITE, 'Go': Fore.LIGHTBLACK_EX,
    'Perl': Fore.LIGHTBLUE_EX, 'Scala': Fore.LIGHTCYAN_EX, 'Haskell': Fore.LIGHTMAGENTA_EX,
    'R': Fore.LIGHTGREEN_EX, 'MATLAB': Fore.LIGHTRED_EX, 'Objective-C': Fore.LIGHTWHITE_EX,
    'Dart': Fore.LIGHTBLUE_EX, 'Lua': Fore.LIGHTCYAN_EX, 'SQL': Fore.YELLOW, 'YAML': Fore.WHITE,
    'JSON': Fore.LIGHTBLACK_EX, 'XML': Fore.CYAN, 'Markdown': Fore.MAGENTA, 'LaTeX': Fore.LIGHTWHITE_EX
}
_PALETTE = [
    Fore.GREEN, Fore.BLUE, Fore.CYAN, Fore.MAGENTA, Fore.YELLOW, Fore.RED, Fore.WHITE,
    Fore.LIGHTBLACK_EX, Fore.LIGHTRED_EX, Fore.LIGHTGREEN_EX, Fore.LIGHTYELLOW_EX,
    Fore.LIGHTBLUE_EX, Fore.LIGHTMAGENTA_EX, Fore.LIGHTCYAN_EX, Fore.LIGHTWHITE_EX
]
COLOR_MAP = {}
for i, lang in enumerate(sorted(LANGUAGE_PATTERNS.keys())):
    COLOR_MAP[lang] = BASE_COLOR_MAP.get(lang, _PALETTE[i % len(_PALETTE)])

# ---------- Build helpers ----------
COMPILED = {lang: [re.compile(p, re.M) for p in spec['patterns']]
            for lang, spec in LANGUAGE_PATTERNS.items()}

EXT2LANG = {}
for lang, spec in LANGUAGE_PATTERNS.items():
    for ext in spec['extensions']:
        EXT2LANG.setdefault(ext.lower(), set()).add(lang)

SHEBANG_HINTS = [('python', 'Python'), ('bash', 'Shell'), ('sh', 'Shell'),
                 ('ruby', 'Ruby'), ('node', 'JavaScript'), ('perl', 'Perl'), ('php', 'PHP')]

MAX_READ = 64 * 1024  # 64 KB peek

def candidate_langs_for(path, head_bytes):
    ext = os.path.splitext(path)[1].lower()
    cands = set(EXT2LANG.get(ext, []))
    if not cands and head_bytes.startswith(b'#!'):
        first = head_bytes.splitlines()[0].decode('utf-8', 'ignore').lower()
        for key, lang in SHEBANG_HINTS:
            if key in first:
                cands.add(lang)
    return cands

def detect_language_for_file(file_path):
    try:
        with open(file_path, 'rb') as f:
            chunk = f.read(MAX_READ)
    except Exception:
        return None
    if b'\x00' in chunk:  # likely binary
        return None
    text = chunk.decode('utf-8', 'ignore')
    candidates = candidate_langs_for(file_path, chunk)

    best_lang, best_hits = None, 0
    for lang in (candidates if candidates else []):
        hits = sum(1 for pat in COMPILED[lang] if pat.search(text))
        if hits > best_hits:
            best_hits, best_lang = hits, lang
    if not best_lang and candidates:
        best_lang = sorted(candidates)[0]
    return best_lang

def collect_file_paths(directory, ignore_dirs):
    file_paths = []
    for root, dirs, files in os.walk(directory, followlinks=False):
        root_lower = root.lower()
        if any(ignored.lower() in root_lower for ignored in ignore_dirs):
            continue
        for name in files:
            file_paths.append(os.path.join(root, name))
    return file_paths

def main():
    target_path = os.path.expanduser(input("Enter a file or directory to scan: ").strip())

    if os.path.isdir(target_path):
        # Directory mode
        ignore_dirs = []  # e.g. ["/proc", "/dev"]
        print(f"\nCollecting files from: {target_path}")
        file_paths = collect_file_paths(target_path, ignore_dirs)
    elif os.path.isfile(target_path):
        # Single-file mode
        print(f"\nCollecting single file: {target_path}")
        file_paths = [target_path]
    else:
        print("Invalid path. Please enter an existing file or directory.")
        sys.exit(1)

    total_files = len(file_paths)
    print(f"Total files found: {total_files}\n")

    # Initialize counts and singleton tracking
    language_counts = {lang: 0 for lang in LANGUAGE_PATTERNS.keys()}
    singleton_paths = {}  # lang -> the sole filepath when count == 1

    # Thread pool size
    workers = min(64, max(8, (os.cpu_count() or 8) * 2))  # good default for IO-bound

    processed = 0
    with ThreadPoolExecutor(max_workers=workers) as executor:
        futures = {executor.submit(detect_language_for_file, fp): fp for fp in file_paths}
        for future in as_completed(futures):
            lang = future.result()
            if lang:
                language_counts[lang] += 1
                if language_counts[lang] == 1:
                    singleton_paths[lang] = futures[future]
                elif language_counts[lang] == 2:
                    singleton_paths.pop(lang, None)

            processed += 1
            if processed % 1000 == 0 or processed == total_files:
                print(f"Processed {processed}/{total_files} files...", end='\r', flush=True)

    print("\n\nDetected Programming Languages Report:")
    nonzero = [(lang, cnt) for lang, cnt in language_counts.items() if cnt > 0]
    if not nonzero:
        print("No programming languages detected in the scanned path.")
        return

    # Summary by language (only nonzero)
    for language, count in sorted(nonzero, key=lambda x: x[0]):
        color = COLOR_MAP.get(language, Fore.WHITE)
        print(f"{color}{language}: {count} files{Style.RESET_ALL}")

    # Languages that occurred exactly once → show the filepath
    singles = sorted(singleton_paths.items(), key=lambda x: x[0])
    if singles:
        print("\nLanguages occurring exactly once (with filepath):")
        for language, path in singles:
            color = COLOR_MAP.get(language, Fore.WHITE)
            print(f"{color}{language}{Style.RESET_ALL} -> {path}")

if __name__ == "__main__":
    main()


NOTE - THIS WILL NOT DAMAGE ANY FILES. IT MAY TRIGGER ANTIVIRUS SOFTWARES

Enter a file or directory to scan: /Volumes/T9 1/UE_5.5

Collecting files from: /Volumes/T9 1/UE_5.5
Total files found: 212007

Processed 212007/212007 files...

Detected Programming Languages Report:
Assembly: 1 files
Batch: 45 files
C: 77338 files
C#: 3601 files
C++: 45994 files
CSS: 5 files
CUDA: 16 files
D: 4 files
GLSL: 147 files
Gradle: 11 files
HLSL: 3 files
HTML: 19 files
JSON: 817 files
Java: 129 files
JavaScript: 21 files
Less: 29 files
Lua: 4 files
Markdown: 74 files
Objective-C: 230 files
Perl: 4 files
PowerShell: 4 files
Protobuf: 14 files
Python: 3107 files
Ruby: 2 files
Scheme: 14 files
Shell: 154 files
Swift: 1 files
TypeScript: 58 files
XML: 752 files
YAML: 14 files

Languages occurring exactly once (with filepath):
Assembly -> /Volumes/T9 1/UE_5.5/Engine/Source/Developer/Windows/LiveCoding/Private/External/LC_JumpToSelf.asm
Swift -> /Volumes/T9 1/UE_5.5/Engine/Source/Runtime/IOS/MarketplaceKitW

In [17]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

try:
    from colorama import Fore, Style, init as colorama_init
    colorama_init(autoreset=True)
    COLORS = {
        "top": Fore.GREEN,        # 1–10
        "high": Fore.YELLOW,      # 11–30
        "mid": Fore.CYAN,         # 31–60
        "low": Fore.MAGENTA       # 61–75
    }
except Exception:
    # Fallback: no colors
    class _NoColor:
        def __getattr__(self, _): return ""
    Fore = Style = _NoColor()
    COLORS = {"top": "", "high": "", "mid": "", "low": ""}

RANKED_LANGUAGES = [
    "Python",
    "JavaScript",
    "Java",
    "C++",
    "C",
    "TypeScript",
    "C#",
    "HTML",
    "CSS",
    "SQL",
    "Go",
    "Shell",
    "PHP",
    "Rust",
    "Kotlin",
    "Swift",
    "Visual Basic",
    "PowerShell",
    "R",
    "MATLAB",
    "Ruby",
    "Objective-C",
    "Dart",
    "Scala",
    "Lua",
    "Perl",
    "GraphQL",
    "JSON",
    "YAML",
    "XML",
    "Markdown",
    "Groovy",
    "Gradle",
    "Julia",
    "Terraform",
    "HCL",
    "Protobuf",
    "Thrift",
    "CUDA",
    "OpenCL",
    "GLSL",
    "HLSL",
    "QML",
    "F#",
    "Clojure",
    "Haskell",
    "Elixir",
    "Erlang",
    "OCaml",
    "Solidity",
    "Zig",
    "D",
    "Assembly",
    "VHDL",
    "Verilog",
    "SystemVerilog",
    "SCSS",
    "SASS",
    "Less",
    "Batch",
    "CoffeeScript",
    "Jinja2",
    "Handlebars",
    "Mustache",
    "Pascal",
    "Fortran",
    "COBOL",
    "Prolog",
    "Common Lisp",
    "Scheme",
    "Crystal",
    "Nim",
    "Puppet",
    "LaTeX",
    "Elm",
]

def _tier_color(rank: int) -> str:
    if 1 <= rank <= 10: return COLORS["top"]
    if 11 <= rank <= 30: return COLORS["high"]
    if 31 <= rank <= 60: return COLORS["mid"]
    return COLORS["low"]

def main():
    print("Language Popularity Ranking (1 = most popular, 75 = least)")
    print("-" * 56)
    width = len(str(len(RANKED_LANGUAGES)))
    for i, lang in enumerate(RANKED_LANGUAGES, start=1):
        color = _tier_color(i)
        print(f"{str(i).rjust(width)}. {color}{lang}{Style.RESET_ALL}")

if __name__ == "__main__":
    main()


Language Popularity Ranking (1 = most popular, 75 = least)
--------------------------------------------------------
 1. Python
 2. JavaScript
 3. Java
 4. C++
 5. C
 6. TypeScript
 7. C#
 8. HTML
 9. CSS
10. SQL
11. Go
12. Shell
13. PHP
14. Rust
15. Kotlin
16. Swift
17. Visual Basic
18. PowerShell
19. R
20. MATLAB
21. Ruby
22. Objective-C
23. Dart
24. Scala
25. Lua
26. Perl
27. GraphQL
28. JSON
29. YAML
30. XML
31. Markdown
32. Groovy
33. Gradle
34. Julia
35. Terraform
36. HCL
37. Protobuf
38. Thrift
39. CUDA
40. OpenCL
41. GLSL
42. HLSL
43. QML
44. F#
45. Clojure
46. Haskell
47. Elixir
48. Erlang
49. OCaml
50. Solidity
51. Zig
52. D
53. Assembly
54. VHDL
55. Verilog
56. SystemVerilog
57. SCSS
58. SASS
59. Less
60. Batch
61. CoffeeScript
62. Jinja2
63. Handlebars
64. Mustache
65. Pascal
66. Fortran
67. COBOL
68. Prolog
69. Common Lisp
70. Scheme
71. Crystal
72. Nim
73. Puppet
74. LaTeX
75. Elm
