In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import re
import sys
import colorama
from colorama import Fore, Style
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed

print("NOTE - THIS WILL NOT DAMAGE ANY FILES. IT MAY TRIGGER ANTIVIRUS SOFTWARES\n")

# ---------- Setup ----------
colorama.init(autoreset=True)

# ---------- Language patterns (75 total) ----------
LANGUAGE_PATTERNS = {
    'Python': {'extensions': ['.py'], 'patterns': [r'\bdef\b', r'\bimport\b']},
    'JavaScript': {'extensions': ['.js', '.mjs', '.cjs'], 'patterns': [r'\bfunction\b', r'console\.log']},
    'Java': {'extensions': ['.java'], 'patterns': [r'public\s+static\s+void\s+main', r'\bclass\s+\w+']},
    'C++': {'extensions': ['.cpp', '.cxx', '.cc', '.hpp', '.h'], 'patterns': [r'#include', r'\bint\s+main\b']},
    'C': {'extensions': ['.c', '.h'], 'patterns': [r'#include', r'\bint\s+main\b']},
    'TypeScript': {'extensions': ['.ts'], 'patterns': [r'\binterface\s+\w+', r':\s*\w+;']},
    'C#': {'extensions': ['.cs'], 'patterns': [r'\busing\s+System', r'namespace\s+\w+']},
    'HTML': {'extensions': ['.html', '.htm'], 'patterns': [r'<html', r'<!DOCTYPE html>']},
    'CSS': {'extensions': ['.css'], 'patterns': [r'\{', r'\}']},
    'SQL': {'extensions': ['.sql'], 'patterns': [r'\bSELECT\b', r'\bINSERT\s+INTO\b']},
    'Go': {'extensions': ['.go'], 'patterns': [r'\bpackage\s+main', r'\bfunc\s+main']},
    'Shell': {'extensions': ['.sh'], 'patterns': [r'#!/bin/(bash|sh)', r'\bcase\b.+\bin\b']},
    'PHP': {'extensions': ['.php', '.phtml'], 'patterns': [r'<\?php']},
    'Rust': {'extensions': ['.rs'], 'patterns': [r'\bfn\s+main', r'\buse\s+\w+::']},
    'Kotlin': {'extensions': ['.kt', '.kts'], 'patterns': [r'\bfun\s+main', r'\bdata\s+class\b']},
    'Swift': {'extensions': ['.swift'], 'patterns': [r'\bimport\s+Foundation', r'\bstruct\s+\w+']},
    'Visual Basic': {'extensions': ['.vb'], 'patterns': [r'\bImports\s+\w+', r'\bSub\s+Main\b']},
    'PowerShell': {'extensions': ['.ps1', '.psm1'], 'patterns': [r'\bParam\(', r'\bWrite-Host\b']},
    'R': {'extensions': ['.r', '.R', '.Rscript'], 'patterns': [r'<-', r'\blibrary\(']},
    'MATLAB': {'extensions': ['.m'], 'patterns': [r'\bfunction\b', r'^\s*end\s*$']},
    'Ruby': {'extensions': ['.rb'], 'patterns': [r'\bdef\b', r'\bend\b', r'\bputs\b']},
    'Objective-C': {'extensions': ['.m', '.mm', '.h'], 'patterns': [r'@interface', r'#import']},
    'Dart': {'extensions': ['.dart'], 'patterns': [r'\bvoid\s+main\s*\(', r'\bimport\s+\'']},
    'Scala': {'extensions': ['.scala', '.sc'], 'patterns': [r'\bobject\s+\w+', r'\bdef\s+main']},
    'Lua': {'extensions': ['.lua'], 'patterns': [r'\bfunction\s+\w+', r'\bend\b']},
    'Perl': {'extensions': ['.pl', '.pm'], 'patterns': [r'\buse\s+strict\b', r'print\s+"']},
    'GraphQL': {'extensions': ['.graphql', '.gql'], 'patterns': [r'\btype\s+\w+\s*\{', r'\bquery\s*\{']},
    'JSON': {'extensions': ['.json'], 'patterns': [r'^\s*\{', r'\}\s*$']},
    'YAML': {'extensions': ['.yaml', '.yml'], 'patterns': [r'^\s*\w+:\s', r'^\s*-\s+\w+']},
    'XML': {'extensions': ['.xml'], 'patterns': [r'<\?xml', r'<\w+>']},
    'Markdown': {'extensions': ['.md', '.markdown'], 'patterns': [r'^#\s', r'\*\*.+\*\*']},
    'Groovy': {'extensions': ['.groovy', '.gvy'], 'patterns': [r'\bclass\s+\w+', r'\bdef\s+\w+']},
    'Gradle': {'extensions': ['.gradle', '.gradle.kts'], 'patterns': [r'\bplugins\s*\{', r'\bdependencies\s*\{']},
    'Julia': {'extensions': ['.jl'], 'patterns': [r'\bfunction\s+\w+', r'\bend\b', r'\busing\s+\w+']},
    'Terraform': {'extensions': ['.tf', '.tfvars'], 'patterns': [r'\bresource\s+"', r'\bprovider\s+"']},
    'HCL': {'extensions': ['.hcl'], 'patterns': [r'\bvariable\s+"', r'\w+\s*=\s*\{']},
    'Protobuf': {'extensions': ['.proto'], 'patterns': [r'\bsyntax\s*=\s*"proto', r'\bmessage\s+\w+']},
    'Thrift': {'extensions': ['.thrift'], 'patterns': [r'\bstruct\s+\w+', r'\bservice\s+\w+']},
    'CUDA': {'extensions': ['.cu', '.cuh'], 'patterns': [r'__global__\s+void', r'#include\s*<cuda']},
    'OpenCL': {'extensions': ['.cl'], 'patterns': [r'__kernel', r'get_global_id\s*\(']},
    'GLSL': {'extensions': ['.glsl', '.vert', '.frag', '.geom'], 'patterns': [r'#version\s+\d+', r'\bvoid\s+main\s*\(']},
    'HLSL': {'extensions': ['.hlsl', '.fx', '.fxh'], 'patterns': [r'\bfloat4\s+\w+\s*:\s*SV_Target', r'\bcbuffer\s+\w+']},
    'QML': {'extensions': ['.qml'], 'patterns': [r'\bimport\s+QtQuick', r'\bItem\s*\{']},
    'F#': {'extensions': ['.fs', '.fsx'], 'patterns': [r'^\s*(namespace|module)\s+\w+', r'^\s*open\s+[A-Z]\w+(?:\.[A-Z]\w+)*', r'^\s*let\s+[a-zA-Z_][\w\']*\s*=']},
    'Clojure': {'extensions': ['.clj', '.cljs', '.cljc'], 'patterns': [r'\(ns\b', r'\(defn\b']},
    'Haskell': {'extensions': ['.hs'], 'patterns': [r'^\s*module\s+[A-Z][\w\.]*\s+where\b', r'^\s*[a-zA-Z_][\w\']*\s*::\s*[A-Z][\w\.\[\]\(\)\s\-\>]*$']},
    'Elixir': {'extensions': ['.ex', '.exs'], 'patterns': [r'\bdefmodule\s+\w+', r'\bdef\s+\w+']},
    'Erlang': {'extensions': ['.erl', '.hrl'], 'patterns': [r'-module\(\w+\)\.', r'-export\(']},
    'OCaml': {'extensions': ['.ml', '.mli'], 'patterns': [r'\blet\s+rec\b', r'\bmodule\b']},
    'Solidity': {'extensions': ['.sol'], 'patterns': [r'\bpragma\s+solidity\b', r'\bcontract\s+\w+']},
    'Zig': {'extensions': ['.zig'], 'patterns': [r'\bpub\s+fn\s+main', r'\bconst\s+\w+\s*=']},
    'D': {'extensions': ['.d'], 'patterns': [r'\bimport\s+\w+;', r'\bvoid\s+main\s*\(']},
    'Assembly': {'extensions': ['.asm', '.s'], 'patterns': [r'\bglobal\b', r'\bsection\b', r'\bmov\b']},
    'VHDL': {'extensions': ['.vhdl', '.vhd'], 'patterns': [r'\bentity\s+\w+\s+is\b', r'\barchitecture\s+\w+\s+of\b']},
    'Verilog': {'extensions': ['.v', '.vh'], 'patterns': [r'\bmodule\s+\w+', r'\bendmodule\b']},
    'SystemVerilog': {'extensions': ['.sv', '.svh'], 'patterns': [r'\binterface\s+\w+', r'\bclass\s+\w+']},
    'SCSS': {'extensions': ['.scss'], 'patterns': [r'\$\w+:\s', r'@mixin\b']},
    'SASS': {'extensions': ['.sass'], 'patterns': [r'^:\w+\s', r'^\s+\w+:\s']},
    'Less': {'extensions': ['.less'], 'patterns': [r'@\w+:\s', r'\.\w+\s*\{']},
    'Batch': {'extensions': ['.bat', '.cmd'], 'patterns': [r'@echo\s+off', r'\bgoto\s+\w+']},
    'CoffeeScript': {'extensions': ['.coffee'], 'patterns': [r'->\s*$', r'\bclass\s+\w+']},
    'Jinja2': {'extensions': ['.j2', '.jinja2', '.jinja'], 'patterns': [r'\{\%[^%]*\%\}', r'\{\{\s*[^}]+\s*\}\}']},
    'Handlebars': {'extensions': ['.hbs', '.handlebars'], 'patterns': [r'\{\{\s*#\w+', r'\{\{\s*/\w+\s*\}\}']},
    'Mustache': {'extensions': ['.mustache'], 'patterns': [r'\{\{\s*\w+\s*\}\}', r'\{\{\s*#\w+']},
    'Pascal': {'extensions': [''.replace('', '.pas')][0:1], 'patterns': [r'^\s*program\s+\w+;', r'^\s*uses\s+\w+(,\s*\w+)*;', r'^\s*begin\s*$']},  # keep .pas
    'Fortran': {'extensions': ['.f', '.for', '.f90', '.f95'], 'patterns': [r'\bPROGRAM\s+\w+', r'\bEND\s+PROGRAM\b']},
    'COBOL': {'extensions': ['.cob', '.cbl'], 'patterns': [r'IDENTIFICATION\s+DIVISION', r'PROCEDURE\s+DIVISION']},
    'Prolog': {'extensions': ['.pl', '.pro'], 'patterns': [r':-\s*(module\(|dynamic\b|use_module\b|op\()', r'\?\-\s*\w+\(']},
    'Common Lisp': {'extensions': ['.lisp', '.lsp', '.cl'], 'patterns': [r'\(defun\b', r'\(defvar\b']},
    'Scheme': {'extensions': ['.scm', '.ss'], 'patterns': [r'\(define\b', r'\(lambda\b']},
    'Crystal': {'extensions': ['.cr'], 'patterns': [r'\bclass\s+\w+', r'\bdef\s+\w+']},
    'Nim': {'extensions': ['.nim'], 'patterns': [r'\bproc\s+\w+', r'\bimport\s+\w+']},
    'Puppet': {'extensions': ['.pp'], 'patterns': [r'\bclass\s+\w+\s*\{', r'\bnode\s+default\b']},
    'LaTeX': {'extensions': ['.tex'], 'patterns': [r'\\begin\{document\}', r'\\section\{']},
    'Elm': {'extensions': ['.elm'], 'patterns': [r'\bmodule\s+\w+\s+exposing\b', r'\bmain\s*=']}
}

# ---------- Colors ----------
BASE_COLOR_MAP = {
    'Python': Fore.GREEN, 'JavaScript': Fore.YELLOW, 'Java': Fore.MAGENTA, 'C++': Fore.BLUE, 'C': Fore.CYAN,
    'TypeScript': Fore.LIGHTYELLOW_EX, 'C#': Fore.LIGHTRED_EX, 'HTML': Fore.LIGHTBLUE_EX, 'CSS': Fore.LIGHTCYAN_EX,
    'SQL': Fore.YELLOW, 'Go': Fore.LIGHTBLACK_EX, 'Shell': Fore.LIGHTGREEN_EX, 'PHP': Fore.LIGHTMAGENTA_EX,
    'Rust': Fore.WHITE, 'Kotlin': Fore.LIGHTWHITE_EX, 'Swift': Fore.LIGHTYELLOW_EX, 'Visual Basic': Fore.LIGHTWHITE_EX,
    'PowerShell': Fore.LIGHTGREEN_EX, 'R': Fore.LIGHTGREEN_EX, 'MATLAB': Fore.LIGHTRED_EX, 'Ruby': Fore.RED,
    'Objective-C': Fore.LIGHTWHITE_EX, 'Dart': Fore.LIGHTBLUE_EX, 'Scala': Fore.LIGHTCYAN_EX, 'Lua': Fore.LIGHTCYAN_EX,
    'Perl': Fore.LIGHTBLUE_EX, 'GraphQL': Fore.MAGENTA, 'JSON': Fore.LIGHTBLACK_EX, 'YAML': Fore.WHITE, 'XML': Fore.CYAN,
    'Markdown': Fore.MAGENTA, 'Groovy': Fore.LIGHTCYAN_EX, 'Gradle': Fore.LIGHTCYAN_EX, 'Julia': Fore.GREEN,
    'Terraform': Fore.GREEN, 'HCL': Fore.GREEN, 'Protobuf': Fore.LIGHTBLUE_EX, 'Thrift': Fore.LIGHTBLUE_EX,
    'CUDA': Fore.CYAN, 'OpenCL': Fore.CYAN, 'GLSL': Fore.CYAN, 'HLSL': Fore.CYAN, 'QML': Fore.LIGHTBLUE_EX,
    'F#': Fore.LIGHTWHITE_EX, 'Clojure': Fore.LIGHTGREEN_EX, 'Haskell': Fore.LIGHTMAGENTA_EX, 'Elixir': Fore.MAGENTA,
    'Erlang': Fore.RED, 'OCaml': Fore.WHITE, 'Solidity': Fore.LIGHTYELLOW_EX, 'Zig': Fore.YELLOW, 'D': Fore.WHITE,
    'Assembly': Fore.WHITE, 'VHDL': Fore.CYAN, 'Verilog': Fore.CYAN, 'SystemVerilog': Fore.CYAN, 'SCSS': Fore.LIGHTCYAN_EX,
    'SASS': Fore.LIGHTCYAN_EX, 'Less': Fore.LIGHTCYAN_EX, 'Batch': Fore.LIGHTBLACK_EX, 'CoffeeScript': Fore.YELLOW,
    'Jinja2': Fore.LIGHTMAGENTA_EX, 'Handlebars': Fore.MAGENTA, 'Mustache': Fore.MAGENTA, 'Pascal': Fore.WHITE,
    'Fortran': Fore.WHITE, 'COBOL': Fore.WHITE, 'Prolog': Fore.WHITE, 'Common Lisp': Fore.WHITE, 'Scheme': Fore.WHITE,
    'Crystal': Fore.WHITE, 'Nim': Fore.WHITE, 'Puppet': Fore.WHITE, 'LaTeX': Fore.WHITE, 'Elm': Fore.WHITE
}

# ---------- Regex compile & indices ----------
COMPILED = {lang: [re.compile(p, re.M) for p in spec['patterns']] for lang, spec in LANGUAGE_PATTERNS.items()}

EXT2LANG = defaultdict(set)
for lang, spec in LANGUAGE_PATTERNS.items():
    for ext in spec['extensions']:
        EXT2LANG[ext.lower()].add(lang)

SHEBANG_HINTS = [('python', 'Python'), ('bash', 'Shell'), ('sh', 'Shell'),
                 ('ruby', 'Ruby'), ('node', 'JavaScript'), ('perl', 'Perl'), ('php', 'PHP')]

MAX_READ = 64 * 1024  # 64 KB peek

def candidate_langs_for(path, head_bytes):
    ext = os.path.splitext(path)[1].lower()
    cands = set(EXT2LANG.get(ext, []))
    if not cands and head_bytes.startswith(b'#!'):
        first = head_bytes.splitlines()[0].decode('utf-8', 'ignore').lower()
        for key, lang in SHEBANG_HINTS:
            if key in first:
                cands.add(lang)
    return cands

def detect_language_for_file(file_path):
    try:
        with open(file_path, 'rb') as f:
            chunk = f.read(MAX_READ)
    except Exception:
        return None
    if b'\x00' in chunk:  # likely binary
        return None
    text = chunk.decode('utf-8', 'ignore')
    candidates = candidate_langs_for(file_path, chunk)
    best_lang, best_hits = None, 0
    for lang in (candidates if candidates else []):
        hits = sum(1 for pat in COMPILED[lang] if pat.search(text))
        if hits > best_hits:
            best_hits, best_lang = hits, lang
    if not best_lang and candidates:
        best_lang = sorted(candidates)[0]
    return best_lang

def collect_file_paths(directory, ignore_dirs):
    file_paths = []
    for root, dirs, files in os.walk(directory, followlinks=False):
        root_lower = root.lower()
        if any(ignored.lower() in root_lower for ignored in ignore_dirs):
            continue
        for name in files:
            file_paths.append(os.path.join(root, name))
    return file_paths

# ---------- Name normalization for query ----------
def _norm(s: str) -> str:
    return re.sub(r'[^a-z0-9]+', '', s.lower())

NAME_INDEX = {_norm(lang): lang for lang in LANGUAGE_PATTERNS.keys()}
# Common synonyms
SYNONYMS = {
    'js': 'JavaScript', 'ts': 'TypeScript', 'csharp': 'C#', 'fsharp': 'F#',
    'cplusplus': 'C++', 'cpp': 'C++', 'objc': 'Objective-C', 'objectivec': 'Objective-C',
    'vb': 'Visual Basic', 'bash': 'Shell', 'powershell': 'PowerShell',
}
for k, v in SYNONYMS.items():
    NAME_INDEX[_norm(k)] = v

# ---------- Main ----------
def main():
    target_path = os.path.expanduser(input("Enter a file or directory to scan: ").strip())

    if os.path.isdir(target_path):
        ignore_dirs = []  # customize if needed
        print(f"\nCollecting files from: {target_path}")
        file_paths = collect_file_paths(target_path, ignore_dirs)
    elif os.path.isfile(target_path):
        print(f"\nCollecting single file: {target_path}")
        file_paths = [target_path]
    else:
        print("Invalid path. Please enter an existing file or directory.")
        sys.exit(1)

    total_files = len(file_paths)
    print(f"Total files found: {total_files}\n")

    language_counts = {lang: 0 for lang in LANGUAGE_PATTERNS.keys()}
    language_files = defaultdict(list)     # <-- store ALL filepaths per detected language
    singleton_paths = {}                   # lang -> the sole filepath when count == 1

    workers = min(64, max(8, (os.cpu_count() or 8) * 2))
    processed = 0
    with ThreadPoolExecutor(max_workers=workers) as executor:
        futures = {executor.submit(detect_language_for_file, fp): fp for fp in file_paths}
        for future in as_completed(futures):
            lang = future.result()
            if lang:
                path = futures[future]
                language_counts[lang] += 1
                language_files[lang].append(path)
                if language_counts[lang] == 1:
                    singleton_paths[lang] = path
                elif language_counts[lang] == 2:
                    singleton_paths.pop(lang, None)

            processed += 1
            if processed % 1000 == 0 or processed == total_files:
                print(f"Processed {processed}/{total_files} files...", end='\r', flush=True)

    print("\n\nDetected Programming Languages Report:")
    nonzero = [(lang, cnt) for lang, cnt in language_counts.items() if cnt > 0]
    if not nonzero:
        print("No programming languages detected in the scanned path.")
    else:
        for language, count in sorted(nonzero, key=lambda x: x[0]):
            color = BASE_COLOR_MAP.get(language, Fore.WHITE)
            print(f"{color}{language}: {count} files{Style.RESET_ALL}")

        singles = sorted(singleton_paths.items(), key=lambda x: x[0])
        if singles:
            print("\nLanguages occurring exactly once (with filepath):")
            for language, path in singles:
                color = BASE_COLOR_MAP.get(language, Fore.WHITE)
                print(f"{color}{language}{Style.RESET_ALL} -> {path}")

    # ---------- Interactive query for filepaths by language ----------
    while True:
        q = input("\nEnter a language to list ALL filepaths (press Enter to exit): ").strip()
        if not q:
            break
        key = _norm(q)
        lang = NAME_INDEX.get(key)
        if not lang:
            print(f"Unknown language: '{q}'. Try again (e.g., lua, js, csharp, objc, kotlin).")
            continue
        paths = language_files.get(lang, [])
        if not paths:
            print(f"No files detected for {lang}.")
            continue

        print(f"\nListing {len(paths)} file(s) for {lang}:")
        for p in paths:
            print(p)

if __name__ == "__main__":
    main()
