# Jupyter Notebook mit den erforderlichen Snippets um eine Viginère Chiffre zu brechen

## Chiffretext einlesen

In [1]:
import string
import pandas as pd

In [7]:
def text_reader(path: str) -> str:
    with open(path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Convert to lowercase
    text = text.lower()
    
    # Replace German specific characters
    replacements = {
        'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss',
        'á': 'a', 'à': 'a', 'â': 'a', 'é': 'e', 
        'è': 'e', 'ê': 'e', 'ë': 'e', 'í': 'i',
        'ì': 'i', 'î': 'i', 'ï': 'i', 'ó': 'o',
        'ò': 'o', 'ô': 'o', 'ú': 'u', 'ù': 'u',
        'û': 'u', 'ç': 'c'
    }
    
    for old, new in replacements.items():
        text = text.replace(old, new)
    
    # Remove all non-alphabetic characters
    result = ''.join(char for char in text if char in string.ascii_lowercase)
    
    return result

In [8]:
chiffre_text = text_reader('chiffre_text.txt')

## Suche nach der Schlüssellänge

In [3]:
KD = 0.0773 # Friedmansche Charakteristik der deutschen Sprache

In [11]:
def text_teiler(text, splitter):
    teiltexte = []
    
    for i in range(splitter):
        teiltexte.append([])
        
    for i in range(len(text)):
        for j in range(splitter):
            if i % splitter == j:
                teiltexte[j].append(text[i])
                
    df = pd.DataFrame(teiltexte)
    df = df.transpose()
                
    return df

In [12]:
df = text_teiler(chiffre_text, 8)

In [13]:
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,l,j,s,j,w,f,q,x
1,y,z,r,r,i,w,n,s
2,l,y,n,s,i,n,j,x
3,y,f,f,y,x,g,n,g
4,q,n,t,y,m,j,p,j
...,...,...,...,...,...,...,...,...
1663,m,n,s,j,m,w,j,s
1664,i,f,l,j,l,j,s,b
1665,n,q,q,n,h,m,s,n
1666,h,m,y,x,l,j,x,f


In [14]:
def calc_friedman_characteristics(df):
    """
    Calculate the Friedman characteristic for each column in the dataframe.
    
    The Friedman characteristic (Kappa) measures the index of coincidence:
    Kappa = Sum[(f_i * (f_i - 1)) / (N * (N - 1))]
    
    Where:
    - f_i is the frequency of the i-th letter in the column
    - N is the total number of letters in the column
    
    Returns:
    - pandas Series with Friedman characteristic for each column
    """
    # Create an empty Series to store the results
    friedman_characteristics = pd.Series(index=df.columns)
    
    # For each column in the dataframe
    for col in df.columns:
        # Get the column as a Series and drop NaN values
        column = df[col].dropna()
        
        # Count the frequency of each letter
        value_counts = column.value_counts()
        
        # Calculate total number of characters
        N = len(column)
        
        # Calculate Friedman characteristic
        if N > 1:  # Avoid division by zero
            kappa = sum(f * (f - 1) for f in value_counts) / (N * (N - 1))
        else:
            kappa = 0
            
        # Store the result
        friedman_characteristics[col] = kappa
        
    return friedman_characteristics

In [16]:
friedmann = calc_friedman_characteristics(df)

In [17]:
friedmann

0    0.074963
1    0.069812
2    0.077582
3    0.071724
4    0.076267
5    0.075079
6    0.074543
7    0.075048
dtype: float64

In [19]:
def find_best_key_length(ciphertext, max_length=20):
    """
    Find the most likely key length for a Vigenère cipher by finding the number
    of columns that produces Friedman characteristics closest to German text.
    
    Args:
        ciphertext (str): The encrypted text
        max_length (int): Maximum key length to try
    
    Returns:
        int: Most likely key length
    """
    min_diff = float('inf')
    best_length = 1
    differences = {}
    
    # Try different key lengths
    for length in range(1, max_length + 1):
        # Split text into columns
        df = text_teiler(ciphertext, length)
        
        # Calculate Friedman characteristics for each column
        characteristics = calc_friedman_characteristics(df)
        
        # Calculate average characteristic
        avg_characteristic = characteristics.mean()
        
        # Calculate difference from German characteristic
        diff = abs(avg_characteristic - KD)
        differences[length] = diff
        
        # If this is better than previous best, update
        if diff < min_diff:
            min_diff = diff
            best_length = length
    
    print(f"Best key length: {best_length} with difference: {min_diff:.6f}")
    print("All differences:")
    for length, diff in differences.items():
        print(f"Length {length}: {diff:.6f}")
            
    return best_length

In [20]:
key_length = find_best_key_length(chiffre_text)

Best key length: 12 with difference: 0.002573
All differences:
Length 1: 0.003009
Length 2: 0.002964
Length 3: 0.002940
Length 4: 0.003015
Length 5: 0.003025
Length 6: 0.002858
Length 7: 0.003139
Length 8: 0.002923
Length 9: 0.002765
Length 10: 0.003038
Length 11: 0.003151
Length 12: 0.002573
Length 13: 0.002898
Length 14: 0.003114
Length 15: 0.002943
Length 16: 0.002916
Length 17: 0.002913
Length 18: 0.002660
Length 19: 0.003098
Length 20: 0.003120
