In [1]:
import numpy as np

# --- 1. Przygotowanie danych (Wariant 2) ---
data = "Example tasks in which this is done include speech recognition, computer vision, translation between (natural) languages, as well as other mappings of inputs"

chars = list(set(data))
data_size, X_size = len(data), len(chars)
print(f"Dane mają {data_size} znaków, {X_size} unikalnych.")
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

# --- 2. Stałe i Hiperparametry ---
# Zmieniono H_size z 10 na 64, aby sieć mogła osiągnąć dokładność 0.1 na dłuższym tekście
H_size = 64          
T_steps = 25         
learning_rate = 1e-1 
weight_sd = 0.1      
z_size = H_size + X_size

# --- 3. Funkcje aktywacji ---
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(y):
    return y * (1 - y)

def tanh(x):
    return np.tanh(x)

def dtanh(y):
    return 1 - y * y

# --- 4. Inicjalizacja Parametrów ---
class Param:
    def __init__(self, name, value):
        self.name = name
        self.v = value                
        self.d = np.zeros_like(value) 
        self.m = np.zeros_like(value) 

class Parameters:
    def __init__(self):
        self.W_f = Param('W_f', np.random.randn(H_size, z_size) * weight_sd + 0.5)
        self.b_f = Param('b_f', np.zeros((H_size, 1)))
        
        self.W_i = Param('W_i', np.random.randn(H_size, z_size) * weight_sd + 0.5)
        self.b_i = Param('b_i', np.zeros((H_size, 1)))
        
        self.W_C = Param('W_C', np.random.randn(H_size, z_size) * weight_sd)
        self.b_C = Param('b_C', np.zeros((H_size, 1)))
        
        self.W_o = Param('W_o', np.random.randn(H_size, z_size) * weight_sd + 0.5)
        self.b_o = Param('b_o', np.zeros((H_size, 1)))
        
        self.W_v = Param('W_v', np.random.randn(X_size, H_size) * weight_sd)
        self.b_v = Param('b_v', np.zeros((X_size, 1)))

    def all(self):
        return [self.W_f, self.W_i, self.W_C, self.W_o, self.W_v,
                self.b_f, self.b_i, self.b_C, self.b_o, self.b_v]

parameters = Parameters()

# --- 5. Forward Pass ---
def forward(x, h_prev, C_prev, p=parameters):
    z = np.vstack((h_prev, x))
    
    f = sigmoid(np.dot(p.W_f.v, z) + p.b_f.v)
    i = sigmoid(np.dot(p.W_i.v, z) + p.b_i.v)
    C_bar = tanh(np.dot(p.W_C.v, z) + p.b_C.v)
    
    C = f * C_prev + i * C_bar
    o = sigmoid(np.dot(p.W_o.v, z) + p.b_o.v)
    h = o * tanh(C)
    
    v = np.dot(p.W_v.v, h) + p.b_v.v
    y = np.exp(v) / np.sum(np.exp(v))
    
    return z, f, i, C_bar, C, o, h, v, y

# --- 6. Backward Pass ---
def backward(target, dh_next, dC_next, C_prev, z, f, i, C_bar, C, o, h, v, y, p=parameters):
    dv = np.copy(y)
    dv[target] -= 1
    
    p.W_v.d += np.dot(dv, h.T)
    p.b_v.d += dv
    
    dh = np.dot(p.W_v.v.T, dv)
    dh += dh_next
    
    do = dh * tanh(C)
    do = dsigmoid(o) * do
    p.W_o.d += np.dot(do, z.T)
    p.b_o.d += do
    
    dC = np.copy(dC_next)
    dC += dh * o * dtanh(tanh(C))
    
    dC_bar = dC * i
    dC_bar = dtanh(C_bar) * dC_bar
    p.W_C.d += np.dot(dC_bar, z.T)
    p.b_C.d += dC_bar
    
    di = dC * C_bar
    di = dsigmoid(i) * di
    p.W_i.d += np.dot(di, z.T)
    p.b_i.d += di
    
    df = dC * C_prev
    df = dsigmoid(f) * df
    p.W_f.d += np.dot(df, z.T)
    p.b_f.d += df
    
    dz = (np.dot(p.W_f.v.T, df) + 
          np.dot(p.W_i.v.T, di) + 
          np.dot(p.W_C.v.T, dC_bar) + 
          np.dot(p.W_o.v.T, do))
    
    dh_prev = dz[:H_size, :]
    dC_prev = f * dC
    
    return dh_prev, dC_prev

# --- 7. Funkcje pomocnicze ---
def clear_gradients(params=parameters):
    for p in params.all():
        p.d.fill(0)

def clip_gradients(params=parameters):
    for p in params.all():
        np.clip(p.d, -1, 1, out=p.d)

def update_parameters(params=parameters):
    for p in params.all():
        p.m += p.d * p.d
        p.v += -(learning_rate * p.d / np.sqrt(p.m + 1e-8))

# --- 8. Wrapper Forward-Backward ---
def forward_backward(inputs, targets, h_prev, C_prev):
    x_s, z_s, f_s, i_s = {}, {}, {}, {}
    C_bar_s, C_s, o_s, h_s = {}, {}, {}, {}
    v_s, y_s = {}, {}
    
    h_s[-1] = np.copy(h_prev)
    C_s[-1] = np.copy(C_prev)
    loss = 0
    
    for t in range(len(inputs)):
        x_s[t] = np.zeros((X_size, 1))
        x_s[t][inputs[t]] = 1
        
        (z_s[t], f_s[t], i_s[t], C_bar_s[t], C_s[t], o_s[t], h_s[t], v_s[t], y_s[t]) = \
            forward(x_s[t], h_s[t-1], C_s[t-1])
            
        loss += -np.log(y_s[t][targets[t], 0])
        
    clear_gradients()
    dh_next = np.zeros_like(h_s[0])
    dC_next = np.zeros_like(C_s[0])
    
    for t in reversed(range(len(inputs))):
        dh_next, dC_next = backward(
            target=targets[t], dh_next=dh_next, dC_next=dC_next, C_prev=C_s[t-1],
            z=z_s[t], f=f_s[t], i=i_s[t], C_bar=C_bar_s[t], C=C_s[t], o=o_s[t], 
            h=h_s[t], v=v_s[t], y=y_s[t]
        )
        
    clip_gradients()
    return loss, h_s[len(inputs)-1], C_s[len(inputs)-1]

# --- 9. Generowanie tekstu ---
def sample(h_prev, C_prev, first_char_idx, sentence_length):
    x = np.zeros((X_size, 1))
    x[first_char_idx] = 1
    h = h_prev
    C = C_prev
    indexes = []
    
    for t in range(sentence_length):
        _, _, _, _, C, _, h, _, p = forward(x, h, C)
        idx = np.random.choice(range(X_size), p=p.ravel())
        x = np.zeros((X_size, 1))
        x[idx] = 1
        indexes.append(idx)
        
    return indexes

# --- 10. Pętla treningowa ---
print("Rozpoczynam trening...")
iter_num = 0
smooth_loss = -np.log(1.0/X_size) * T_steps
pointer = 0

g_h_prev = np.zeros((H_size, 1))
g_C_prev = np.zeros((H_size, 1))

while True:
    if pointer + T_steps >= len(data) or iter_num == 0:
        g_h_prev = np.zeros((H_size, 1))
        g_C_prev = np.zeros((H_size, 1))
        pointer = 0

    inputs = [char_to_idx[ch] for ch in data[pointer: pointer + T_steps]]
    targets = [char_to_idx[ch] for ch in data[pointer + 1: pointer + T_steps + 1]]

    loss, g_h_prev, g_C_prev = forward_backward(inputs, targets, g_h_prev, g_C_prev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001

    update_parameters()

    if iter_num % 1000 == 0:
        sample_idx = sample(g_h_prev, g_C_prev, inputs[0], 200)
        txt = ''.join(idx_to_char[idx] for idx in sample_idx)
        print(f"\n--- Iteracja {iter_num}, Loss: {smooth_loss:.4f} ---")
        print(f"Generowany tekst: {txt}")
        
        # Warunek z zadania: dokładność 0.1
        if smooth_loss < 0.1: 
            print("\nOsiągnięto wymaganą dokładność (Loss < 0.1). Koniec treningu.")
            break

    pointer += T_steps
    iter_num += 1

Dane mają 157 znaków, 27 unikalnych.
Rozpoczynam trening...

--- Iteracja 0, Loss: 82.3958 ---
Generowany tekst: mElhcnsvclaseeeeuiochmhlhmec ths shkcce)ka,hhuhc nhEctrich hlhawi ile)wuehnxhx(hick mwiwiacpcnhhcceweohchhaohlhlcu ciihch wehhch siwthcefmnchw dtciehhashh ehnihh b hchhk,chhedow achwhlcwwewsishwhhchwh

--- Iteracja 1000, Loss: 35.5367 ---
Generowany tekst: pee, as well as other mappings of cotion, translation between (natural) languages, as well as other mappings of bettransision between is of betweec (nas tin donurecgs vis ee (natural) languages, as we

--- Iteracja 2000, Loss: 13.2453 ---
Generowany tekst: ion, translation betwell ation between (natural) doneecnssplecof iomeudr s other mappings of eetion tramplat s oncer mamwet nn(oneechision, translation between (natural) languagea, as well as other ma

--- Iteracja 3000, Loss: 4.9259 ---
Generowany tekst: tion  is, in well as other mappings of bettranslation, torel is well as other mappings of betnratural) languages, as 