# Statistical Analysis


In [6]:
from collections import Counter
import os

ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ,.-"
MOD = len(ALPHABET)


def read_ciphertext(filename):
    with open(filename, "r", encoding="utf-8") as f:
        text = f.read().strip()
    return "".join(c for c in text if c in ALPHABET)


def ngrams(text, n):
    return [text[i:i+n] for i in range(len(text) - n + 1)]


def frequency_table(items):
    total = len(items)
    counter = Counter(items)
    return counter, total


def index_of_coincidence(text):
    freq = Counter(text)
    N = len(text)
    return sum(f * (f - 1) for f in freq.values()) / (N * (N - 1))


def format_table(counter, total, limit=15):
    lines = []
    for i, (k, v) in enumerate(counter.most_common()):
        if i >= limit:
            break
        perc = v / total * 100
        lines.append(f"{k:>4} : {v:>6} ({perc:5.2f}%)")
    return "\n".join(lines)


In [15]:
def analyze_cipher_to_file(cipher_id, base_path="177-Student"):
    input_file = os.path.join(base_path, f"{cipher_id}.txt")
    output_dir = os.path.join(base_path, "analysis_results")
    os.makedirs(output_dir, exist_ok=True)

    output_file = os.path.join(output_dir, f"analysis_{cipher_id}.txt")

    text = read_ciphertext(input_file)

    mono_counter, mono_total = frequency_table(text)
    di_counter, di_total = frequency_table(ngrams(text, 2))
    tri_counter, tri_total = frequency_table(ngrams(text, 3))

    ic = index_of_coincidence(text)

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(f"Statistical Analysis – Cipher {cipher_id}\n")
        f.write("=" * 40 + "\n\n")

        f.write(f"Text length: {len(text)}\n")
        f.write(f"Index of Coincidence: {ic:.5f}\n\n")

        f.write("1-grams (characters)\n")
        f.write("-" * 25 + "\n")
        f.write(format_table(mono_counter, mono_total) + "\n\n")

        f.write("2-grams (digrams)\n")
        f.write("-" * 25 + "\n")
        f.write(format_table(di_counter, di_total) + "\n\n")

        f.write("3-grams (trigrams)\n")
        f.write("-" * 25 + "\n")
        f.write(format_table(tri_counter, tri_total) + "\n")

    print(f"✔ Analysis for cipher {cipher_id} saved to {output_file}")


In [16]:
for i in range(4):
    analyze_cipher_to_file(i)


✔ Analysis for cipher 0 saved to 177-Student\analysis_results\analysis_0.txt
✔ Analysis for cipher 1 saved to 177-Student\analysis_results\analysis_1.txt
✔ Analysis for cipher 2 saved to 177-Student\analysis_results\analysis_2.txt
✔ Analysis for cipher 3 saved to 177-Student\analysis_results\analysis_3.txt


The statistical analysis of all four ciphertexts was performed using a custom Python notebook.
For each ciphertext, frequency distributions of characters, digrams and trigrams, as well as the index of coincidence, were computed and saved to individual analysis files

# Classification

In [5]:
ENGLISH_FREQ = {
    'E': 11.93, 'T': 8.80, 'A': 7.90, 'O': 7.52, 'I': 6.82, 'H': 6.42,
    'N': 6.23, 'S': 5.99, 'R': 5.54, 'D': 4.36, 'L': 4.00,
    'U': 2.83, 'M': 2.64, 'W': 2.37, 'Y': 2.24, 'F': 2.11,
    'C': 2.10, 'G': 1.89, ',': 1.70, '.': 1.50, 'P': 1.47,
    'B': 1.37, 'V': 0.92, 'K': 0.75, '-': 0.21, 'X': 0.14,
    'Q': 0.11, 'J': 0.09, 'Z': 0.04
}


In [21]:
def chi_square_stat(text):
    N = len(text)
    freq = Counter(text)
    chi2 = 0
    for c in ALPHABET:
        observed = freq.get(c, 0)
        expected = ENGLISH_FREQ.get(c, 0) * N / 100
        if expected > 0:
            chi2 += (observed - expected) ** 2 / expected
    return chi2


In [22]:
def avg_ic_for_period(text, period):
    ics = []
    for i in range(period):
        stream = text[i::period]
        if len(stream) > 1:
            ics.append(index_of_coincidence(stream))
    return sum(ics) / len(ics)


In [23]:
def classify_cipher_robust(text):
    ic = index_of_coincidence(text)

    if ic > 0.055:
        # Cesare vs Sostituzione
        chi2 = chi_square_stat(text)
        if chi2 < 150:
            return "Caesar"
        else:
            return "Substitution"

    else:
        # Vigenere vs Hill
        ic5 = avg_ic_for_period(text, 5)
        if ic5 > 0.055:
            return "Vigenere"
        else:
            return "Hill"


In [24]:
results = {}
for i in range(4):
    text = read_ciphertext(f"177-Student/{i}.txt")
    results[i] = classify_cipher_robust(text)

for k, v in results.items():
    print(f"Cipher {k}: {v}")


Cipher 0: Substitution
Cipher 1: Substitution
Cipher 2: Hill
Cipher 3: Vigenere


The identification of the cipher types was partially automated by extracting statistical features such as the index of coincidence, frequency variance, and dominant n-gram distributions using a Python script.
Based on these features, each ciphertext was classified and subsequently decrypted using appropriate tools.
The automatic classification was used as a support tool; final decisions were validated manually through frequency analysis.
An automatic classification based on statistical features was initially used to support the identification of the cipher types.
Since Caesar ciphers are a special case of monoalphabetic substitution, the final classification was refined manually by comparing the frequency distributions with the reference English statistics.

# CESAR

In [2]:
def caesar_decrypt(text, shift):
    result = ""
    for c in text:
        idx = ALPHABET.index(c)
        result += ALPHABET[(idx - shift) % MOD]
    return result


def brute_force_caesar(filename):
    text = read_ciphertext(filename)
    for shift in range(MOD):
        candidate = caesar_decrypt(text, shift)
        print(f"\nSHIFT = {shift}")
        print(candidate[:400])


In [12]:
brute_force_caesar("177-Student/1.txt")



SHIFT = 0
J,XKIKTBXYYXVJEYJTA.DZTMTOTBBWXI.HXYEHVECCKD.VTJ.EDUOCTA.DZJ,X.HMEHWIIEKDWJ,.DTDWICTBBTDWQTYJXHMTBA.DZHEKDWJ,XWXVAJ,HXXEHYEKHJ.CXIQJ,XOVBKIJXHXWJEZXJ,XHQOTMD.DZWXXFBOQTDWBEEA.DZTJJ,XITCXIFEJEYWXXFZBEECEDJ,XUTDAIRCKHCKH.DZLXHOBEM.DJ,XH,OJ,C.VTBJEDXEYEDXEFFHXIIXWUOJ,XT.HQCHIRYBKI,.DZUXZTDJEMEDWXHM,XHXJ,XOMXHXJEIBXXFQYEHJ,XOVEKBWDEJIBXXFWEMDIJT.HIQJ,XOVEKBWDEJIBXXF.DTWEZ,EBXICXBB.DZEYE.BQJ,XOVEKBWDEJ

SHIFT = 1
IZWJHJSAWXXWUIDXIS-,CYSLSNSAAVWH,GWXDGUDBBJC,USI,DCTNBS-,CYIZW,GLDGVHHDJCVIZ,CSCVHBSAASCVPSXIWGLSA-,CYGDJCVIZWVWU-IZGWWDGXDJGI,BWHPIZWNUAJHIWGWVIDYWIZWGPNSLC,CYVWWEANPSCVADD-,CYSIIZWHSBWHEDIDXVWWEYADDBDCIZWTSC-HQBJGBJG,CYKWGNADL,CIZWGZNIZB,USAIDCWDXDCWDEEGWHHWVTNIZWS,GPBGHQXAJHZ,CYTWYSCIDLDCVWGLZWGWIZWNLWGWIDHAWWEPXDGIZWNUDJAVCDIHAWWEVDLCHIS,GHPIZWNUDJAVCDIHAWWE,CSVDYZDAWHBWAA,CYDXD,APIZWNUDJAVCDI

SHIFT = 2
HYVIGIR-VWWVTHCWHR.ZBXRKRMR--UVGZFVWCFTCAAIBZTRHZCBSMAR.ZBXHYVZFKCFUGGCIBUHYZBRBUGAR--RBUORWHVFKR-.ZBXFCIBUHYVUVT.HYFVVCFWCIFHZAVGOHYVMT-IGHVFVUHCXVHYVFOMRKBZBXUVVD-

THE USUAL EFFECT OF TAKING AWAY ALL DESIRE FOR COMMUNICATION BY MAKING THEIR WORDS SOUND THIN AND SMALL AND, AFTER WALKING ROUND THE DECK THREE OR FOUR TIMES, THEY CLUSTERED TOGETHER, YAWNING DEEPLY, AND LOOKING AT THE SAME SPOT OF DEEP GLOOM ON THE BANKS. MURMURING VERY LOW IN THE RHYTHMICAL TONE OF ONE OPPRESSED BY THE AIR, MRS. FLUSHING BEGAN TO WONDER WHERE THEY WERE TO SLEEP, FOR THEY COULD NOT SLEEP DOWNSTAIRS, THEY COULD NOT SLEEP IN A DOG-HOLE SMELLING OF OIL, THEY COULD NOT

## Vigenère

In [45]:
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.,-"
MOD = len(ALPHABET)
from collections import Counter

ENGLISH_FREQ = {
    'A': 0.08167, 'B': 0.01492, 'C': 0.02782, 'D': 0.04253,
    'E': 0.12702, 'F': 0.02228, 'G': 0.02015, 'H': 0.06094,
    'I': 0.06966, 'J': 0.00153, 'K': 0.00772, 'L': 0.04025,
    'M': 0.02406, 'N': 0.06749, 'O': 0.07507, 'P': 0.01929,
    'Q': 0.00095, 'R': 0.05987, 'S': 0.06327, 'T': 0.09056,
    'U': 0.02758, 'V': 0.00978, 'W': 0.02360, 'X': 0.00150,
    'Y': 0.01974, 'Z': 0.00074
}

def chi_squared_letters_only(text):
    letters = [c for c in text if c in ENGLISH_FREQ]
    N = len(letters)
    if N == 0:
        return float('inf')
    freq = Counter(letters)
    return sum(
        (freq.get(c, 0) - ENGLISH_FREQ[c] * N) ** 2 / (ENGLISH_FREQ[c] * N)
        for c in ENGLISH_FREQ
    )


In [58]:
text = read_ciphertext("177-Student/3.txt")
guess_key_length(text)


k  |  IC
--------
 1 | 0.0395
 2 | 0.0395
 3 | 0.0389
 4 | 0.0399
 5 | 0.0643
 6 | 0.0390
 7 | 0.0393
 8 | 0.0394
 9 | 0.0388
10 | 0.0646
11 | 0.0385
12 | 0.0404
13 | 0.0400
14 | 0.0387
15 | 0.0628
16 | 0.0398
17 | 0.0389
18 | 0.0376
19 | 0.0403
20 | 0.0658


In [52]:
def caesar_decrypt(text, shift):
    return ''.join(
        ALPHABET[(ALPHABET.index(c) - shift) % MOD] for c in text
    )

def find_key(ciphertext, key_len):
    key = ""
    for i in range(key_len):
        slice_text = ciphertext[i::key_len]
        scores = [
            (chi_squared_letters_only(caesar_decrypt(slice_text, s)), s)
            for s in range(MOD)
        ]
        key += ALPHABET[min(scores)[1]]
    return key

def vigenere_decrypt(ciphertext, key):
    return ''.join(
        ALPHABET[
            (ALPHABET.index(c) - ALPHABET.index(key[i % len(key)])) % MOD
        ]
        for i, c in enumerate(ciphertext)
    )


In [53]:
def try_key_lengths(ciphertext, k_min=4, k_max=17, preview=300):
    results = []
    for k in range(k_min, k_max + 1):
        key = find_key(ciphertext, k)
        plaintext = vigenere_decrypt(ciphertext, key)
        results.append((k, key, plaintext))
        print("=" * 60)
        print(f"Key length: {k}")
        print(f"Recovered key: {key}")
        print("Plaintext preview:")
        print(plaintext[:preview])
        print()
    return results


In [57]:
results = try_key_lengths(text, 2, 20)


Key length: 2
Recovered key: UU
Plaintext preview:
NBLTPBIHN,ZHRGVAVLSVBT,NUBICMCZOPIHPT.-ZMFJIVDFRGVNPJOI-T,RVBICRVLSCWZWENERNPAKHIS,CYPMFANLSBEUJBLDALSTE-WPSST-F,TI-FQIBBICWRBFP.YPMCNRATCRGPEZOBPNHHYBEHSTZWCRRYFURV.UHLVIS,CYPMAOBBJLUVO-RGV-FQCV-U,IBBP-ER-FTO-CUHOBIJKTCWEYGVWFLUFRFB,GSFCFWPDROWBICSVAV-TV-HSHVAX,SRWJRT-PE,SYPE-YFTEJEMIXFOOLHREF-FEAF

Key length: 3
Recovered key: HHU
Plaintext preview:
.OLD-BVUNLJHBTVNFLCFBDLNEOIPZCJ,PVUPDK-JZFWVVQSRTFN-WOVMTLBVOVCBFLCPWJGE.RR.-AXUICLCI-MSNNYCBREJOYDNYSDR-G-SCD-SLTVMFAVBOVCGBBS-.I-MP.RNDCBTPRJOO-NUUYORHCDZGPRBIFEBVKEHYFICLCI-MN,BOWLEFOMBGFMFAPVME,VOB-MEBMFD,-PEH,OIWXTPGEITVGSLESRSO,TCFPSW-QR,GBVPSFNVMDVMUSUFAHLSBGJBD--R,CIPRMYSDEWRMVHF,,LUBESMFRNF

Key length: 4
Recovered key: URHU
Plaintext preview:
NEYTPEVHNBJHRJFAVOCVBWLNUEVCMFJOPLUPTAMZMIWIVGSRGY.PJRV-TBBVBLPRVOCCW-GENHBNPDXHIVLCYSZFAQYSBHEJBOQALVDE-Z-SSWMF,WV-FTVBBLPWRESP.,-MCQBATFBGPHJOBS.HH,OEHVDZWFBRYIERVAEHLYVS,FIPMD,BBMYUVRMRGYMFQFF-UBVBBSMERCSTOCPUHROIJNDCWHIGVZSLUIBFBBTSFFSW

In [72]:
from collections import Counter

# Alfabeto dato
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.,-"
MOD = len(ALPHABET)

# Frequenze standard INGLESE (approssimative)
# A-Z standard + stime per . , -
# Nota: Assegno a '-' una frequenza alta (simil-spazio)
ENG_FREQS = {
    'A': 0.0817, 'B': 0.0149, 'C': 0.0278, 'D': 0.0425, 'E': 0.1270, 'F': 0.0223,
    'G': 0.0202, 'H': 0.0609, 'I': 0.0697, 'J': 0.0015, 'K': 0.0077, 'L': 0.0403,
    'M': 0.0241, 'N': 0.0675, 'O': 0.0751, 'P': 0.0193, 'Q': 0.0010, 'R': 0.0599,
    'S': 0.0633, 'T': 0.0906, 'U': 0.0276, 'V': 0.0098, 'W': 0.0236, 'X': 0.0015,
    'Y': 0.0197, 'Z': 0.0007,
    '.': 0.0100, ',': 0.0100, '-': 0.1500 # Assumo che '-' sia lo spazio
}

def index_of_coincidence(text):
    N = len(text)
    if N <= 1: return 0
    freq = Counter(text)
    return sum(f * (f - 1) for f in freq.values()) / (N * (N - 1))

def average_ic(text, k):
    parts = [''.join(text[i::k]) for i in range(k)]
    return sum(index_of_coincidence(p) for p in parts) / k

def vigenere_decrypt(text, key):
    res = ""
    for i, c in enumerate(text):
        t = ALPHABET.index(c)
        k = ALPHABET.index(key[i % len(key)])
        res += ALPHABET[(t - k) % MOD]
    return res

# --- NUOVA LOGICA DI CRACKING ---

def chi_squared_score(text_segment):
    """
    Calcola quanto il testo assomiglia all'inglese.
    Più basso è il punteggio, più è probabile che sia inglese.
    """
    count = Counter(text_segment)
    length = len(text_segment)
    score = 0
    
    for char in ALPHABET:
        observed = count[char]
        expected = length * ENG_FREQS.get(char, 0)
        # Formula Chi-Quadro: (Osservato - Atteso)^2 / Atteso
        if expected > 0:
            score += ((observed - expected) ** 2) / expected
        else:
            # Penalità se compare un carattere che non dovrebbe esistere (raro)
            if observed > 0: score += 100 
            
    return score

def recover_key_chi_squared(text, k):
    key = ""
    # Per ogni posizione nella chiave (da 0 a k-1)
    for i in range(k):
        column = ''.join(text[i::k]) # Prendi la fetta i-esima
        best_shift = 0
        min_chi = float('inf')
        
        # Prova tutte le 29 lettere possibili come parte della chiave
        for shift_idx in range(MOD):
            # Decifra la colonna con questa lettera
            char_key = ALPHABET[shift_idx]
            # Nota: Decifrare una colonna con Cesare significa spostare indietro
            shifted_col = []
            for c in column:
                c_idx = ALPHABET.index(c)
                p_idx = (c_idx - shift_idx) % MOD
                shifted_col.append(ALPHABET[p_idx])
            
            current_text = "".join(shifted_col)
            
            # Calcola quanto assomiglia all'inglese
            chi = chi_squared_score(current_text)
            
            if chi < min_chi:
                min_chi = chi
                best_shift = shift_idx
        
        key += ALPHABET[best_shift]
    return key

# --- ESECUZIONE ---

text = "EVCKGV-,ESQ,I.MUMCJMVKSELV-WDWQFG-,GKRTQDZA-MXZI.MEGAF-TKSIMV-WIMCJWNQNYEYIEGUB,-JSWPGDZUECJVYLAVCXUCJKYTNGJJKTZSK-TZH-VV-WNIVZGRPGDWEIUKWI.GYQFVGE,,PVY,JKQNWIIPZLIMRL,CM-JSWPGDUFVVACLMFTI.MTZHWMTLS-VVGTYITZKFTWL,FV-ABKWNYP.MNZCLZIZVS.JZWZNGXIFNV-WJMUMTKMT,J,MUOSJINAIKTGYSJPGYTPZKYAYD-OZFFC,IYZTZYUZFAC,XGHEYZHGGJWOZBFUGFIJFUAY.MFVAF-FSEFWT,WCTQOSELKFLUZFDNJXGXJCIVZVLXQFI.MWF--VFFWJ,Q,LFUGFQ.MULSPMFSZFBGNWIFKFSGXCJWE.EGCKMPLBYVV,DI,KPVUDUSECI,AC,BKLZUUKXGF,EGEYIPYSEWVZTFWMACFVGGXKPGESEDUHSI,GDNZ-TF,JPGYH-.VAC,HTGDD,-WJKWPLZYMXZC-VIGXKPGKWMMPLZXI,SSJ.JZNJIVVIXQPFWIFJZSGXGVGYLOGGYZGKICMUKI.IPMHLINTI.MFACEMTUIUJNZLU,UZIVMVOWYVVODCWP.L-VFGLJBJAU.BGJWCMHLJEKWJIUQPZVVDJZAYVUGGXMTKQXITCCY,U,WCTCKH.ITHAPIUV-EQHZ,E.JAHWTKESKM-VCX.JZIFBPLZYVUHGUVIGJKQPX,IKNZHUVFD,EMUGXVZK.ZKLQLHVMPZSKPV-WDGDM,CLKFYJBJAU.VGNWI,JGLYLDQVUDU-DNMFWNEQI-IRIPYI.MUZSZTQOWXZK.ZKWXZGKPGDSELLMV,QP.TP.JZBFAKFYCQI-IJWHLZY,VZSDMTKQKPGK,,PV,JCNKDAYLV-WJIOZELZRGHYIUVCFZE-WJ.TV,EINGCXWPJWJ.CMGUVVSSELUAAYVEZZULKLHJMVL,EOAO,CTKVBGMRHWIWDKWIAGY,KNQJHFUGL,DMJZEL.QFZ-,UHWW.CXAY,VGUFVVZBGTCLWKPGKUYVGT"

# 1. Impostiamo lunghezza chiave a 5 (visto il picco a 5, 10, 15)
key_len = 5
print(f"Tentativo con lunghezza chiave: {key_len}")

# 2. Recupera la chiave
recovered_key = recover_key_chi_squared(text, key_len)
print("Key trovata:", recovered_key)

# 3. Decifra
pt = vigenere_decrypt(text, recovered_key)
print("\nTesto Decifrato (primi 200 char):")
print(pt[:200])

Tentativo con lunghezza chiave: 5
Key trovata: CVXUI

Testo Decifrato (primi 200 char):
CAIT,THENKOGOGESUISETSYNDTH-MOONMITESX-IBEGIEVEOGECOGOURSYRETH-REAR-WILDKEACO.KT.RYCHELCAZAR,ED,AIDMARQELLOPSCREYTURENINTH-WATEM.HEL-NASS-RSEDWONEMEHHTDESCOV-RANERREPTELE.RYCHEL.ONTIIUED,OGERENCERTYIN


In [73]:
from collections import Counter

ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.,-"
MOD = len(ALPHABET)

# Frequenze ottimizzate: '-' è lo SPAZIO (altissima frequenza)
ENG_FREQS = {
    'A': 0.08, 'B': 0.015, 'C': 0.028, 'D': 0.043, 'E': 0.13, 'F': 0.022,
    'G': 0.02, 'H': 0.06, 'I': 0.07, 'J': 0.002, 'K': 0.008, 'L': 0.04,
    'M': 0.024, 'N': 0.067, 'O': 0.075, 'P': 0.019, 'Q': 0.001, 'R': 0.06,
    'S': 0.063, 'T': 0.091, 'U': 0.028, 'V': 0.01, 'W': 0.024, 'X': 0.002,
    'Y': 0.02, 'Z': 0.001,
    '.': 0.01, ',': 0.01, '-': 0.18  # IL TRATTINO È LO SPAZIO
}

def chi_squared_score(text_segment):
    count = Counter(text_segment)
    length = len(text_segment)
    score = 0
    for char in ALPHABET:
        observed = count[char]
        expected = length * ENG_FREQS.get(char, 0)
        if expected > 0:
            score += ((observed - expected) ** 2) / expected
    return score

def inspect_key_candidates(text, k):
    print(f"--- Analisi Candidati Chiave (Lunghezza {k}) ---")
    final_key = ""
    
    for i in range(k):
        column = ''.join(text[i::k])
        candidates = []
        
        for shift in range(MOD):
            # Decifra colonna con shift
            char_key = ALPHABET[shift]
            shifted_col = []
            for c in column:
                c_idx = ALPHABET.index(c)
                p_idx = (c_idx - shift) % MOD
                shifted_col.append(ALPHABET[p_idx])
            
            # Calcola score (minore è meglio)
            score = chi_squared_score("".join(shifted_col))
            candidates.append((char_key, score))
        
        # Ordina per score migliore (più basso)
        candidates.sort(key=lambda x: x[1])
        
        # Mostra i top 3 candidati per questa posizione
        print(f"Posizione {i+1}: 1° '{candidates[0][0]}' (score {candidates[0][1]:.2f}) | 2° '{candidates[1][0]}' | 3° '{candidates[2][0]}'")
        final_key += candidates[0][0]
    
    return final_key

def vigenere_decrypt(text, key):
    res = ""
    for i, c in enumerate(text):
        t = ALPHABET.index(c)
        k = ALPHABET.index(key[i % len(key)])
        res += ALPHABET[(t - k) % MOD]
    return res

# INSERISCI QUI IL TUO TESTO
text = "EVCKGV-,ESQ,I.MUMCJMVKSELV-WDWQFG-,GKRTQDZA-MXZI.MEGAF-TKSIMV-WIMCJWNQNYEYIEGUB,-JSWPGDZUECJVYLAVCXUCJKYTNGJJKTZSK-TZH-VV-WNIVZGRPGDWEIUKWI.GYQFVGE,,PVY,JKQNWIIPZLIMRL,CM-JSWPGDUFVVACLMFTI.MTZHWMTLS-VVGTYITZKFTWL,FV-ABKWNYP.MNZCLZIZVS.JZWZNGXIFNV-WJMUMTKMT,J,MUOSJINAIKTGYSJPGYTPZKYAYD-OZFFC,IYZTZYUZFAC,XGHEYZHGGJWOZBFUGFIJFUAY.MFVAF-FSEFWT,WCTQOSELKFLUZFDNJXGXJCIVZVLXQFI.MWF--VFFWJ,Q,LFUGFQ.MULSPMFSZFBGNWIFKFSGXCJWE.EGCKMPLBYVV,DI,KPVUDUSECI,AC,BKLZUUKXGF,EGEYIPYSEWVZTFWMACFVGGXKPGESEDUHSI,GDNZ-TF,JPGYH-.VAC,HTGDD,-WJKWPLZYMXZC-VIGXKPGKWMMPLZXI,SSJ.JZNJIVVIXQPFWIFJZSGXGVGYLOGGYZGKICMUKI.IPMHLINTI.MFACEMTUIUJNZLU,UZIVMVOWYVVODCWP.L-VFGLJBJAU.BGJWCMHLJEKWJIUQPZVVDJZAYVUGGXMTKQXITCCY,U,WCTCKH.ITHAPIUV-EQHZ,E.JAHWTKESKM-VCX.JZIFBPLZYVUHGUVIGJKQPX,IKNZHUVFD,EMUGXVZK.ZKLQLHVMPZSKPV-WDGDM,CLKFYJBJAU.VGNWI,JGLYLDQVUDU-DNMFWNEQI-IRIPYI.MUZSZTQOWXZK.ZKWXZGKPGDSELLMV,QP.TP.JZBFAKFYCQI-IJWHLZY,VZSDMTKQKPGK,,PV,JCNKDAYLV-WJIOZELZRGHYIUVCFZE-WJ.TV,EINGCXWPJWJ.CMGUVVSSELUAAYVEZZULKLHJMVL,EOAO,CTKVBGMRHWIWDKWIAGY,KNQJHFUGL,DMJZEL.QFZ-,UHWW.CXAY,VGUFVVZBGTCLWKPGKUYVGT"

# Forza lunghezza 5
best_key = inspect_key_candidates(text, 5)
print(f"\nChiave suggerita: {best_key}")

pt = vigenere_decrypt(text, best_key)
print(f"Testo (primi 100): {pt[:100]}")

--- Analisi Candidati Chiave (Lunghezza 5) ---
Posizione 1: 1° 'C' (score 88.10) | 2° 'H' | 3° 'Q'
Posizione 2: 1° 'V' (score 59.11) | 2° 'G' | 3° 'H'
Posizione 3: 1° 'X' (score 358.81) | 2° 'D' | 3° 'H'
Posizione 4: 1° 'U' (score 72.07) | 2° 'F' | 3° 'C'
Posizione 5: 1° 'I' (score 60.37) | 2° 'W' | 3° 'N'

Chiave suggerita: CVXUI
Testo (primi 100): CAIT,THENKOGOGESUISETSYNDTH-MOONMITESX-IBEGIEVEOGECOGOURSYRETH-REAR-WILDKEACO.KT.RYCHELCAZAR,ED,AIDM


In [5]:
from collections import Counter

# Definizione dell'alfabeto e delle frequenze attese (Inglese + Spazio/Punteggiatura)
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.,-"
MOD = len(ALPHABET)

# Frequenze medie della lingua inglese (con '-' come spazio al 15%)
ENGLISH_FREQS = {
    'A': 0.0817, 'B': 0.0149, 'C': 0.0278, 'D': 0.0425, 'E': 0.1270, 'F': 0.0223,
    'G': 0.0202, 'H': 0.0609, 'I': 0.0697, 'J': 0.0015, 'K': 0.0077, 'L': 0.0403,
    'M': 0.0241, 'N': 0.0675, 'O': 0.0751, 'P': 0.0193, 'Q': 0.0010, 'R': 0.0599,
    'S': 0.0633, 'T': 0.0906, 'U': 0.0276, 'V': 0.0098, 'W': 0.0236, 'X': 0.0015,
    'Y': 0.0197, 'Z': 0.0007, '.': 0.0100, ',': 0.0100, '-': 0.1800 # Trattino come spazio
}

def vigenere_decrypt(text, key):
    res = ""
    for i, c in enumerate(text):
        t_idx = ALPHABET.index(c)
        k_idx = ALPHABET.index(key[i % len(key)])
        res += ALPHABET[(t_idx - k_idx) % MOD]
    return res

def chi_squared(text_segment):
    """Calcola lo score Chi-Quadro: più basso è, meglio è."""
    count = Counter(text_segment)
    total = len(text_segment)
    score = 0
    for char, expected_prob in ENGLISH_FREQS.items():
        observed = count.get(char, 0)
        expected = total * expected_prob
        if expected > 0:
            score += ((observed - expected) ** 2) / expected
    return score

def crack_vigenere(ciphertext, key_len):
    best_key = ""
    
    # Analizziamo ogni posizione della chiave in modo indipendente
    for i in range(key_len):
        column = ciphertext[i::key_len]
        best_char = ''
        min_score = float('inf')
        
        # Proviamo ognuna delle 29 lettere per questa posizione della chiave
        for char_idx in range(MOD):
            # Decifra la colonna con questa lettera (spostamento Cesare)
            decoded_col = "".join(ALPHABET[(ALPHABET.index(c) - char_idx) % MOD] for c in column)
            score = chi_squared(decoded_col)
            
            if score < min_score:
                min_score = score
                best_char = ALPHABET[char_idx]
        
        best_key += best_char
        
    return best_key

# --- ESECUZIONE ---

ciphertext = "EVCKGV-,ESQ,I.MUMCJMVKSELV-WDWQFG-,GKRTQDZA-MXZI.MEGAF-TKSIMV-WIMCJWNQNYEYIEGUB,-JSWPGDZUECJVYLAVCXUCJKYTNGJJKTZSK-TZH-VV-WNIVZGRPGDWEIUKWI.GYQFVGE,,PVY,JKQNWIIPZLIMRL,CM-JSWPGDUFVVACLMFTI.MTZHWMTLS-VVGTYITZKFTWL,FV-ABKWNYP.MNZCLZIZVS.JZWZNGXIFNV-WJMUMTKMT,J,MUOSJINAIKTGYSJPGYTPZKYAYD-OZFFC,IYZTZYUZFAC,XGHEYZHGGJWOZBFUGFIJFUAY.MFVAF-FSEFWT,WCTQOSELKFLUZFDNJXGXJCIVZVLXQFI.MWF--VFFWJ,Q,LFUGFQ.MULSPMFSZFBGNWIFKFSGXCJWE.EGCKMPLBYVV,DI,KPVUDUSECI,AC,BKLZUUKXGF,EGEYIPYSEWVZTFWMACFVGGXKPGESEDUHSI,GDNZ-TF,JPGYH-.VAC,HTGDD,-WJKWPLZYMXZC-VIGXKPGKWMMPLZXI,SSJ.JZNJIVVIXQPFWIFJZSGXGVGYLOGGYZGKICMUKI.IPMHLINTI.MFACEMTUIUJNZLU,UZIVMVOWYVVODCWP.L-VFGLJBJAU.BGJWCMHLJEKWJIUQPZVVDJZAYVUGGXMTKQXITCCY,U,WCTCKH.ITHAPIUV-EQHZ,E.JAHWTKESKM-VCX.JZIFBPLZYVUHGUVIGJKQPX,IKNZHUVFD,EMUGXVZK.ZKLQLHVMPZSKPV-WDGDM,CLKFYJBJAU.VGNWI,JGLYLDQVUDU-DNMFWNEQI-IRIPYI.MUZSZTQOWXZK.ZKWXZGKPGDSELLMV,QP.TP.JZBFAKFYCQI-IJWHLZY,VZSDMTKQKPGK,,PV,JCNKDAYLV-WJIOZELZRGHYIUVCFZE-WJ.TV,EINGCXWPJWJ.CMGUVVSSELUAAYVEZZULKLHJMVL,EOAO,CTKVBGMRHWIWDKWIAGY,KNQJHFUGL,DMJZEL.QFZ-,UHWW.CXAY,VGUFVVZBGTCLWKPGKUYVGT"

key = crack_vigenere(ciphertext, 5)
plaintext = vigenere_decrypt(ciphertext, key)

print(f"Chiave recuperata: {key}")
print("-" * 30)
print(f"Testo decifrato:\n{plaintext}")

Chiave recuperata: CVXUI
------------------------------
Testo decifrato:
CAIT,THENKOGOGESUISETSYNDTH-MOONMITESX-IBEGIEVEOGECOGOURSYRETH-REAR-WILDKEACO.KT.RYCHELCAZAR,ED,AIDMARQELLOPSCREYTURENINTH-WATEM.HEL-NASS-RSEDWONEMEHHTDESCOV-RANERREPTELE.RYCHEL.ONTIIUED,OGERENCERTYINTOZEAREQOLUTEON.IHTOLDVGELEIURGE,,SHE-FFECOOFTH-SESUZTERGPHESWYSALIOTLEDYSHEDZYRIDGEY.WCO.AGOERREBARDIIHPEPKERFOMSOMEHOMENOS.SIBGEDAGOUD.KOORG-LLOWYNDINRARDLTSPECPLATE,UPONOGEUNFINDN-STOGROMENWGESTYYED.COWEV-R.INYPPAR-NSCOITENTHENTGJRTIX,AYS.KLAZIIHWITCAMICMOTCOKEANDYNOTEZOOKIIONEOATHEMYNYSPYRTELTFURNESHEDNISTIIH-ROJMT.BPTONTCEEVEIINGOATHES-VENTCDAZ.YSSHETSATAODINN-R.HEYPPEAMEDMOMERESOLESSOGANUNUAL,OGEDIINER-OABLERATSEOBETW-ENTWJLONFRINDORSWHI.GWER-LEFTPNCUROAINE,BYHEGENSOMDERSWDARKIETSG-LLASNGARPGYASAFNIFEENSHINCLIMYTE.AIDSHEOOWNTCENSPMANGOPTINCERCLENANDLENESOABRIFCTDOTNBENEYTHTH-M,BUELDINBSWHI.GNEV-RTHOREDBY,AYSHJWEDBTNIGHO.ANDOGESEYFLOW-DRIFCTOVEMTHELYNDJU,HINFZYSHEHOVINBLIGHOSOFTCETTEYMERSWTHESEHHTGPLFILGEDTH-SAME

In [2]:
def decifra_vigenere(testo, chiave):
    """
    Funzione accademica per la decifrazione di Vigenere.
    Supporta un alfabeto personalizzato di 29 caratteri.
    """
    ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.,-"
    MOD = len(ALPHABET)
    testo_decifrato = ""
    
    # Rimuoviamo eventuali spazi bianchi accidentali dalla chiave
    chiave = chiave.upper().strip()
    
    for i, carattere in enumerate(testo):
        if carattere in ALPHABET:
            # Indice del carattere nel testo
            idx_t = ALPHABET.index(carattere)
            # Indice del carattere nella chiave (ciclico)
            idx_k = ALPHABET.index(chiave[i % len(chiave)])
            
            # Formula di decifrazione: (C - K) mod 29
            nuovo_idx = (idx_t - idx_k) % MOD
            testo_decifrato += ALPHABET[nuovo_idx]
        else:
            # Se il carattere non è nell'alfabeto, lo lascia invariato
            testo_decifrato += carattere
            
    return testo_decifrato

# --- CONFIGURAZIONE ---
testo_cifrato = "EVCKGV-,ESQ,I.MUMCJMVKSELV-WDWQFG-,GKRTQDZA-MXZI.MEGAF-TKSIMV-WIMCJWNQNYEYIEGUB,-JSWPGDZUECJVYLAVCXUCJKYTNGJJKTZSK-TZH-VV-WNIVZGRPGDWEIUKWI.GYQFVGE,,PVY,JKQNWIIPZLIMRL,CM-JSWPGDUFVVACLMFTI.MTZHWMTLS-VVGTYITZKFTWL,FV-ABKWNYP.MNZCLZIZVS.JZWZNGXIFNV-WJMUMTKMT,J,MUOSJINAIKTGYSJPGYTPZKYAYD-OZFFC,IYZTZYUZFAC,XGHEYZHGGJWOZBFUGFIJFUAY.MFVAF-FSEFWT,WCTQOSELKFLUZFDNJXGXJCIVZVLXQFI.MWF--VFFWJ,Q,LFUGFQ.MULSPMFSZFBGNWIFKFSGXCJWE.EGCKMPLBYVV,DI,KPVUDUSECI,AC,BKLZUUKXGF,EGEYIPYSEWVZTFWMACFVGGXKPGESEDUHSI,GDNZ-TF,JPGYH-.VAC,HTGDD,-WJKWPLZYMXZC-VIGXKPGKWMMPLZXI,SSJ.JZNJIVVIXQPFWIFJZSGXGVGYLOGGYZGKICMUKI.IPMHLINTI.MFACEMTUIUJNZLU,UZIVMVOWYVVODCWP.L-VFGLJBJAU.BGJWCMHLJEKWJIUQPZVVDJZAYVUGGXMTKQXITCCY,U,WCTCKH.ITHAPIUV-EQHZ,E.JAHWTKESKM-VCX.JZIFBPLZYVUHGUVIGJKQPX,IKNZHUVFD,EMUGXVZK.ZKLQLHVMPZSKPV-WDGDM,CLKFYJBJAU.VGNWI,JGLYLDQVUDU-DNMFWNEQI-IRIPYI.MUZSZTQOWXZK.ZKWXZGKPGDSELLMV,QP.TP.JZBFAKFYCQI-IJWHLZY,VZSDMTKQKPGK,,PV,JCNKDAYLV-WJIOZELZRGHYIUVCFZE-WJ.TV,EINGCXWPJWJ.CMGUVVSSELUAAYVEZZULKLHJMVL,EOAO,CTKVBGMRHWIWDKWIAGY,KNQJHFUGL,DMJZEL.QFZ-,UHWW.CXAY,VGUFVVZBGTCLWKPGKUYVGT"

# PROVA QUESTE CHIAVI:
# "LVWUI" (Statistica)
# "LIVES" (Parola senso compiuto)
# "LIVID" (Parola senso compiuto)
chiave_test = "CVSUI" 

risultato = decifra_vigenere(testo_cifrato, chiave_test)

print(f"Chiave usata: {chiave_test}")
print(f"Risultato (primi 300 char):\n{risultato[:300]}")

Chiave usata: CVSUI
Risultato (primi 300 char):
CANT,THJNKOGTGESUNSETSANDTHEMOONRITES--IBELIEVETGECOLOURSARETHEREAREWILDPEACOCKT.RACHELHAZARDED,ANDMARVELLOUSCREATURESINTHEWATER.HELENASSERSED,ONEMJHHTDJSCOVERANEWREPTJLE.RACHELCONTINUED,TGERESCERTAINTOBEAREVOLUTJON.IMTOLD.GELENURGED,SHEEFFECTOFTHESESUBTERGUHESWASALITTLEDASHEDBYRIDLEY.WHO.AGTERREGAR


In [3]:
ciphertext = "EVCKGV-,ESQ,I.MUMCJMVKSELV-WDWQFG-,GKRTQDZA-MXZI.MEGAF-TKSIMV-WIMCJWNQNYEYIEGUB,-JSWPGDZUECJVYLAVCXUCJKYTNGJJKTZSK-TZH-VV-WNIVZGRPGDWEIUKWI.GYQFVGE,,PVY,JKQNWIIPZLIMRL,CM-JSWPGDUFVVACLMFTI.MTZHWMTLS-VVGTYITZKFTWL,FV-ABKWNYP.MNZCLZIZVS.JZWZNGXIFNV-WJMUMTKMT,J,MUOSJINAIKTGYSJPGYTPZKYAYD-OZFFC,IYZTZYUZFAC,XGHEYZHGGJWOZBFUGFIJFUAY.MFVAF-FSEFWT,WCTQOSELKFLUZFDNJXGXJCIVZVLXQFI.MWF--VFFWJ,Q,LFUGFQ.MULSPMFSZFBGNWIFKFSGXCJWE.EGCKMPLBYVV,DI,KPVUDUSECI,AC,BKLZUUKXGF,EGEYIPYSEWVZTFWMACFVGGXKPGESEDUHSI,GDNZ-TF,JPGYH-.VAC,HTGDD,-WJKWPLZYMXZC-VIGXKPGKWMMPLZXI,SSJ.JZNJIVVIXQPFWIFJZSGXGVGYLOGGYZGKICMUKI.IPMHLINTI.MFACEMTUIUJNZLU,UZIVMVOWYVVODCWP.L-VFGLJBJAU.BGJWCMHLJEKWJIUQPZVVDJZAYVUGGXMTKQXITCCY,U,WCTCKH.ITHAPIUV-EQHZ,E.JAHWTKESKM-VCX.JZIFBPLZYVUHGUVIGJKQPX,IKNZHUVFD,EMUGXVZK.ZKLQLHVMPZSKPV-WDGDM,CLKFYJBJAU.VGNWI,JGLYLDQVUDU-DNMFWNEQI-IRIPYI.MUZSZTQOWXZK.ZKWXZGKPGDSELLMV,QP.TP.JZBFAKFYCQI-IJWHLZY,VZSDMTKQKPGK,,PV,JCNKDAYLV-WJIOZELZRGHYIUVCFZE-WJ.TV,EINGCXWPJWJ.CMGUVVSSELUAAYVEZZULKLHJMVL,EOAO,CTKVBGMRHWIWDKWIAGY,KNQJHFUGL,DMJZEL.QFZ-,UHWW.CXAY,VGUFVVZBGTCLWKPGKUYVGT"

# L'alfabeto corretto per questo ciphertext
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ,.-"
MOD = len(ALPHABET)

def decrypt(text, key):
    res = []
    key_idx = 0
    for char in text:
        if char in ALPHABET:
            c_val = ALPHABET.index(char)
            k_val = ALPHABET.index(key[key_idx % len(key)])
            p_val = (c_val - k_val) % MOD
            res.append(ALPHABET[p_val])
            key_idx += 1
        else:
            res.append(char)
    return "".join(res)

key = "CVSUI"
plaintext = decrypt(ciphertext, key)

print(f"CHIAVE: {key}")
print("-" * 60)
# Aggiungo spazi per leggibilità basandomi sulla punteggiatura inglese
formatted_text = plaintext.replace(".", ". ").replace(",", ", ")
print(formatted_text)

CHIAVE: CVSUI
------------------------------------------------------------
CANT. THINKOFTHESUNSETSANDTHEMOONRISES--IBELIEVETHECOLOURSARETHEREAREWILDPEACOCKS, RACHELHAZARDED. ANDMARVELLOUSCREATURESINTHEWATER, HELENASSERTED. ONEMIGHTDISCOVERANEWREPTILE, RACHELCONTINUED. THERESCERTAINTOBEAREVOLUTION, IMTOLD, HELENURGED. THEEFFECTOFTHESESUBTERFUGESWASALITTLEDASHEDBYRIDLEY, WHO, AFTERREGARDINGPEPPERFORSOMEMOMENTS, SIGHEDALOUD, POORFELLOWANDINWARDLYSPECULATEDUPONTHEUNKINDNESSOFWOMEN. HESTAYED, HOWEVER, INAPPARENTCONTENTMENTFORSIXDAYS, PLAYINGWITHAMICROSCOPEANDANOTEBOOKINONEOFTHEMANYSPARSELYFURNISHEDSITTING-ROOMS, BUTONTHEEVENINGOFTHESEVENTHDAY, ASTHEYSATATDINNER, HEAPPEAREDMORERESTLESSTHANUSUAL. THEDINNER-TABLEWASSETBETWEENTWOLONGWINDOWSWHICHWERELEFTUNCURTAINEDBYHELENSORDERS. DARKNESSFELLASSHARPLYASAKNIFEINTHISCLIMATE, ANDTHETOWNTHENSPRANGOUTINCIRCLESANDLINESOFBRIGHTDOTSBENEATHTHEM. BUILDINGSWHICHNEVERSHOWEDBYDAYSHOWEDBYNIGHT, ANDTHESEAFLOWEDRIGHTOVERTHELANDJUDGINGBYTHEMOVINGLIGHTSOFTHESTEAM

## SUBSTITUTION

In [12]:
import collections
import string

class CipherSolver:
    def __init__(self, ciphertext):
        self.ciphertext = ciphertext
        self.alphabet = sorted(list(set(ciphertext))) # Rileva tutti i caratteri unici
        self.mapping = {char: char for char in self.alphabet} # Mappa iniziale identità

    def analyze_frequencies(self):
        """Calcola e stampa le frequenze dei caratteri."""
        counter = collections.Counter(self.ciphertext)
        total = len(self.ciphertext)
        
        print("\n--- Analisi Frequenze ---")
        print(f"{'Char':<6} {'Count':<8} {'Freq (%)'}")
        print("-" * 25)
        for char, count in counter.most_common():
            freq = (count / total) * 100
            print(f"'{char}'    {count:<8} {freq:.2f}%")
        print("-" * 25)

    def set_substitution(self, cipher_char, plain_char):
        """Imposta una sostituzione. Usa plain_char=None per rimuovere."""
        if cipher_char in self.mapping:
            # Memorizziamo la sostituzione in minuscolo per distinguerla dal cifrato
            self.mapping[cipher_char] = plain_char.lower() if plain_char else cipher_char
        else:
            print(f"Attenzione: Il carattere '{cipher_char}' non è nel testo cifrato.")

    def decrypt(self):
        """Restituisce il testo decifrato con la mappa corrente."""
        return "".join([self.mapping.get(c, c) for c in self.ciphertext])

    def print_state(self):
        """Stampa il testo con le sostituzioni correnti evidenziate."""
        decrypted = self.decrypt()
        print("\n--- Anteprima Decifrazione ---")
        # Stampa solo i primi 300 caratteri per brevità
        preview = decrypted[:300] + "..."
        print(preview)
        
        # Mostra la mappa corrente
        print("\n--- Chiave Corrente ---")
        key_str = " | ".join([f"{k}->{v}" for k, v in self.mapping.items() if k != v])
        print(key_str if key_str else "Nessuna sostituzione impostata.")

# --- UTILIZZO ---

# Inserisci qui il tuo testo cifrato completo
ciphertext = read_ciphertext("177-Student/0.txt")
solver = CipherSolver(ciphertext)
solver.analyze_frequencies()

# APPLICAZIONE DELLE IPOTESI (Modifica qui per testare)
# Formato: solver.set_substitution('LETTERA_CIFRATA', 'lettera_in_chiaro')

solver.set_substitution('C', 'e') # Ipotesi frequenza massima
solver.set_substitution('U', 't') # Ipotesi trigramma UIC -> THE
solver.set_substitution('I', 'h')
solver.set_substitution('T', 'i') # Ipotesi TKJ -> ING
solver.set_substitution('K', 'n')
solver.set_substitution('J', 'g')

# Visualizza risultato
solver.print_state()


--- Analisi Frequenze ---
Char   Count    Freq (%)
-------------------------
'C'    127      12.12%
'U'    87       8.30%
'E'    85       8.11%
'I'    79       7.54%
'K'    75       7.16%
'L'    71       6.77%
'T'    68       6.49%
'N'    68       6.49%
','    64       6.11%
'Q'    38       3.63%
'S'    37       3.53%
'J'    30       2.86%
'V'    27       2.58%
'A'    27       2.58%
'W'    26       2.48%
'Z'    23       2.19%
'M'    20       1.91%
'Y'    18       1.72%
'-'    18       1.72%
'P'    16       1.53%
'D'    12       1.15%
'.'    11       1.05%
'G'    11       1.05%
'R'    8        0.76%
'B'    1        0.10%
'X'    1        0.10%
-------------------------

--- Anteprima Decifrazione ---
e.enVhiQetheYhESGeenLEYingANZZNnMQEAethingLLWLEnhESGeenANnLAiNWLN-theeBAiteZentN-intiZEAYPVhiAhLeeZeSnNtNnQYtNQEYGE,eLNZethinginhe,PGWtinthet,eeLEnStheLRYPEnStheM,Ng,eLLN-hiLLMeeAhVhiAhLeeZeSine.itEGQeVELMNLiti.eQYMEin-WQtNhe,P-N,nNhWZEnGeinghESe.e,ANZeLNAQNLetNhe,Ge-N,eDLheVELLt,WARZNtiNnQ

In [19]:
import collections
import string

class CipherSolver:
    def __init__(self, ciphertext):
        self.ciphertext = ciphertext
        self.alphabet = sorted(list(set(ciphertext))) # Rileva tutti i caratteri unici
        self.mapping = {char: char for char in self.alphabet} # Mappa iniziale identità

    def analyze_frequencies(self):
        """Calcola e stampa le frequenze dei caratteri."""
        counter = collections.Counter(self.ciphertext)
        total = len(self.ciphertext)
        
        print("\n--- Analisi Frequenze ---")
        print(f"{'Char':<6} {'Count':<8} {'Freq (%)'}")
        print("-" * 25)
        for char, count in counter.most_common():
            freq = (count / total) * 100
            print(f"'{char}'    {count:<8} {freq:.2f}%")
        print("-" * 25)

    def set_substitution(self, cipher_char, plain_char):
        """Imposta una sostituzione. Usa plain_char=None per rimuovere."""
        if cipher_char in self.mapping:
            # Memorizziamo la sostituzione in minuscolo per distinguerla dal cifrato
            self.mapping[cipher_char] = plain_char.lower() if plain_char else cipher_char
        else:
            print(f"Attenzione: Il carattere '{cipher_char}' non è nel testo cifrato.")

    def decrypt(self):
        """Restituisce il testo decifrato con la mappa corrente."""
        return "".join([self.mapping.get(c, c) for c in self.ciphertext])

    def print_state(self):
        """Stampa il testo con le sostituzioni correnti evidenziate."""
        decrypted = self.decrypt()
        print("\n--- Anteprima Decifrazione ---")
        # Stampa solo i primi 300 caratteri per brevità
        preview = decrypted[:300] + "..."
        print(preview)
        
        # Mostra la mappa corrente
        print("\n--- Chiave Corrente ---")
        key_str = " | ".join([f"{k}->{v}" for k, v in self.mapping.items() if k != v])
        print(key_str if key_str else "Nessuna sostituzione impostata.")

# --- UTILIZZO ---

# Inserisci qui il tuo testo cifrato completo
ciphertext = read_ciphertext("177-Student/0.txt")
solver = CipherSolver(ciphertext)
solver.analyze_frequencies()

# APPLICAZIONE DELLE IPOTESI (Modifica qui per testare)
# Formato: solver.set_substitution('LETTERA_CIFRATA', 'lettera_in_chiaro')
# --- APPLICAZIONE DELLE IPOTESI ---

# --- APPLICAZIONE DELLE IPOTESI ---

# 1. Conferme solide
solver.set_substitution('C', 'e') 
solver.set_substitution('U', 't') 
solver.set_substitution('I', 'h')
solver.set_substitution('T', 'i')
solver.set_substitution('K', 'n')
solver.set_substitution('J', 'g') # da 'ing'

# 2. Conferme da "had been"
solver.set_substitution('E', 'a')
solver.set_substitution('S', 'd')
solver.set_substitution('G', 'b')

# 3. Conferme da "same thing" e "even"
solver.set_substitution('Q', 's')
solver.set_substitution('A', 'm')
solver.set_substitution('.', 'v') 
solver.set_substitution('M', ' ') # Ipotesi: M è lo spazio

# Visualizza risultato
solver.print_state()


--- Analisi Frequenze ---
Char   Count    Freq (%)
-------------------------
'C'    127      12.12%
'U'    87       8.30%
'E'    85       8.11%
'I'    79       7.54%
'K'    75       7.16%
'L'    71       6.77%
'T'    68       6.49%
'N'    68       6.49%
','    64       6.11%
'Q'    38       3.63%
'S'    37       3.53%
'J'    30       2.86%
'V'    27       2.58%
'A'    27       2.58%
'W'    26       2.48%
'Z'    23       2.19%
'M'    20       1.91%
'Y'    18       1.72%
'-'    18       1.72%
'P'    16       1.53%
'D'    12       1.15%
'.'    11       1.05%
'G'    11       1.05%
'R'    8        0.76%
'B'    1        0.10%
'X'    1        0.10%
-------------------------

--- Anteprima Decifrazione ---
evenVhisetheYhadbeenLaYingmNZZNn samethingLLWLanhadbeenmNnLmiNWLN-theeBmiteZentN-intiZamYPVhimhLeeZednNtNnsYtNsaYba,eLNZethinginhe,PbWtinthet,eeLandtheLRYPandthe ,Ng,eLLN-hiLL eemhVhimhLeeZedinevitabseVaL NLitivesY ain-WstNhe,P-N,nNhWZanbeinghadeve,mNZeLNmsNLetNhe,be-N,eDLheVaLLt,WmRZNtiNns

In [30]:
import collections
import string
import sys

class MonoalphabeticCipherAnalyzer:
    """
    A tool for analyzing and decrypting Monoalphabetic Substitution Ciphers.
    """

    def __init__(self, ciphertext):
        # Remove any accidental whitespace from input to treat it as a pure stream
        self.ciphertext = ciphertext.replace("\n", "").replace(" ", "").strip()
        self.alphabet_size = 26
        # Final Key Mapping derived from cryptanalysis
        self.key_map = {
            'A': 'c', 'B': 'x', 'C': 'e', 'D': 'A', 'E': 'a', 
            'G': 'b', 'I': 'h', 'J': 'g', 'K': 'n', 'L': 's', 
            'M': 'p', 'N': 'o', 'P': 'w', 'Q': 'l', 'R': 'k', 
            'S': 'd', 'T': 'i', 'U': 't', 'V': 'w', 'W': 'u', 
            'X': 'z', 'Y': 'y', 'Z': 'm', 
            '.': 'v', ',': 'r', '-': 'f' 
        }

    def calculate_ic(self):
        """
        Calculates the Index of Coincidence (IC).
        IC for random text is ~0.038.
        IC for English text is ~0.066.
        """
        N = len(self.ciphertext)
        if N <= 1: return 0
        
        counts = collections.Counter(self.ciphertext)
        numerator = sum(n * (n - 1) for n in counts.values())
        denominator = N * (N - 1)
        
        return numerator / denominator

    def decrypt(self):
        """
        Applies the substitution key to the ciphertext.
        """
        plaintext = []
        for char in self.ciphertext:
            # If char is in our key, swap it; otherwise keep as is
            decrypted_char = self.key_map.get(char, char)
            plaintext.append(decrypted_char)
        return "".join(plaintext)

    def print_report(self):
        ic = self.calculate_ic()
        plaintext = self.decrypt()
        
        print("="*60)
        print(f"{'CRYPTANALYSIS REPORT':^60}")
        print("="*60)
        print(f"Ciphertext Length: {len(self.ciphertext)}")
        print(f"Index of Coincidence (IC): {ic:.5f}")
        print("-" * 60)
        print("ANALYSIS:")
        if 0.060 <= ic <= 0.070:
            print("The IC suggests a standard Monoalphabetic Substitution Cipher")
            print("preserving the underlying frequency distribution of English.")
        else:
            print("The IC suggests polyalphabetic or non-standard encipherment.")
        print("-" * 60)
        print("DECRYPTED STREAM (Scriptio Continua):")
        print(plaintext)
        print("="*60)

# --- EXECUTION ---

if __name__ == "__main__":
    # Raw ciphertext
    raw_cipher = """C.CKVITQCUICYIESGCCKLEYTKJANZZNKMQEACUITKJLLWLEKIESGCCKANKLATNWLN-UICCBATUCZCKUN-TKUTZEAYPVITAILCCZCSKNUNKQYUNQEYGE,CLNZCUITKJTKIC,PGWUTKUICU,CCLEKSUICLRYPEKSUICM,NJ,CLLN-ITLLMCCAIVITAILCCZCSTKC.TUEGQCVELMNLTUT.CQYMETK-WQUNIC,P-N,KNIWZEKGCTKJIESC.C,ANZCLNAQNLCUNIC,GC-N,CDLICVELLU,WARZNUTNKQCLLELITLLMCCAIVCKUNKPEKSIC,ICE,UJE.CJ,CEULCME,EUCQCEMLEUUICQELUVN,SLDLICLEUVTUIIC,-TKJC,LAW,QCS,NWKSELUNKCPQNNRTKJLU,ETJIUTK-,NKUN-IC,SNVKUICZNWKUETKN.C,UICMQETKDLNUICKPTUIESEAUWEQQYIEMMCKCSUNIC,PEM,NMNLEQN-E,UIW,QNNRCS,NWKSEUIC,ITL-EACVELNSSQYUVTLUCSDLICVELS,EVTKJIC,G,CEUIVTUILWAIST--TAWQUYUIEULICANWQSIE,SQYEKLVC,DYNWZTJIUIE.CRKNVKDICLCTXCSIC,TKITLE,ZLEJETKEKSEJETKEKSEJETKUICYAQELMCSCEAINUIC,PZW,ZW,TKJTKE,UTAWQEUCQYDVCQQPLTJICSE,UIW,PLTKRTKJGEARNKUICJ,NWKSPUIEULUICZNLUVNKSC,-WQUITKJUIEULC.C,IEMMCKCSUNZCDICQNNRCSELT-ICVC,CU,YTKJUNMWUUITKJLLCCKTKES,CEZGCLTSC,CEQUITKJLDUIC,CVELEQNKJLTQCKACDTULUICZNLUMC,-CAUUITKJTKUICVN,QSPLWLEKLUEUCSP.C,YJCKUQYEKSVTUIJ,CEUANK.TAUTNKDTUVELKNQNKJC,ZC,CQYEM,NMNLEQN-ZE,,TEJCPGWUN-ZE,,TEJCVTUIE,UIW,PVTUIVINZLICVELTKQN."""

    analyzer = MonoalphabeticCipherAnalyzer(raw_cipher)
    analyzer.print_report()

                    CRYPTANALYSIS REPORT                    
Ciphertext Length: 1046
Index of Coincidence (IC): 0.06220
------------------------------------------------------------
ANALYSIS:
The IC suggests a standard Monoalphabetic Substitution Cipher
preserving the underlying frequency distribution of English.
------------------------------------------------------------
DECRYPTED STREAM (Scriptio Continua):
evenwhiletheyhadbeensayingcommonplacethingssusanhadbeenconsciousoftheexcitementofintimacywwhichseemednotonlytolaybaresomethinginherwbutinthetreesandtheskywandtheprogressofhisspeechwhichseemedinevitablewaspositivelypainfultoherwfornohumanbeinghadevercomesoclosetoherbeforeAshewasstruckmotionlessashisspeechwentonwandherheartgavegreatseparateleapsatthelastwordsAshesatwithherfingerscurledroundastonewlookingstraightinfrontofherdownthemountainovertheplainAsothenwithadactuallyhappenedtoherwaproposalofarthurlookedroundatherhisfacewasoddlytwistedAshewasdrawingherbreathwithsuchdifficultythat

Cryptanalysis Report: Cipher 0
1. Statistical Analysis & Identification The initial statistical analysis of the ciphertext revealed an Index of Coincidence (IC) of approximately 0.062. Since the standard IC for the English language is roughly 0.066, and random text is near 0.038, this strongly suggested a Monoalphabetic Substitution Cipher. The high frequency of the character 'C' (12.12%) and 'U' (8.30%) further indicated a direct mapping to common English letters such as 'e' and 't'.

2. Decryption Methodology The decryption process followed a heuristic approach combining frequency analysis and pattern recognition (crib dragging):

Frequency Mapping: The trigram UIC appeared frequently, suggesting the word "THE". This established the mapping: U→t, I→h, C→e.

Suffix Identification: The pattern TKJ (appearing at the end of sequences) was identified as the common suffix "-ING", mapping T→i, K→n, J→g.

Contextual Refinement: Initial assumptions treated punctuation (dots, commas) as nulls or separators. However, sequence analysis (e.g., C.CK decoding to e.en) revealed that punctuation marks were part of the substitution alphabet (e.g., . mapped to v to form "even").

Correction of Anomalies: A significant breakthrough occurred by correcting the hypothesis for the letter 'M'. Initially thought to be a space, the sequence monsmious (from CANKLATNWLN) suggested the word "conscious", correcting the mapping to M→p and A→c. This confirmed the text was a scriptio continua (continuous script without spaces).

3. Final Decrypted Text The resulting plaintext appears to be an excerpt from a literary narrative (likely The Voyage Out by Virginia Woolf). Below is the text with word spacing and punctuation restored for readability:

"Even while they had been saying commonplace things, Susan had been conscious of the excitement of intimacy which seemed not only to lay bare something in her, but in the trees and the sky and the progress of his speech which seemed inevitable was positively painful to her. For no human being had ever come so close to her before. As he was struck motionless as his speech went on, and her heart gave great separate leaps at the last words. As he sat with his fingers curled round a stone, looking straight in front of her down the mountain over the plain, so then it had actually happened to her: a proposal of Arthur looked round at her. His face was oddly twisted as he was drawing her breath with such difficulty that she could hardly answer. As you might have known, as he seized her in his arms again and again and again, they clasped each other, murmuring inarticulately. 'Well,' sighed Arthur, sinking back on the ground, 'that's the most wonderful thing that's ever happened to me.' He looked as if he were trying to put things seen in a dream beside real things. There was a long silence. 'It's the most perfect thing in the world,' Susan stated, very gently and with great conviction. It was no longer merely a proposal of marriage, but of marriage with Arthur, with whom she was in love."