In [1]:
import csv
from utils import *

In [2]:
data=open('combined.txt','r',encoding='utf8').read()
data=data.lower()
chars=list(set(data))
data_size,vocab_size=len(data),len(chars)
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))

There are 75971 total characters and 39 unique characters in your data.


In [3]:
char_to_ix={ch:i for i,ch in enumerate(sorted(chars))}
ix_to_char={i:ch for i,ch in enumerate(sorted(chars))}
print(ix_to_char)

{0: '\n', 1: ' ', 2: '.', 3: '0', 4: '1', 5: '2', 6: '3', 7: '4', 8: '5', 9: '6', 10: '7', 11: '8', 12: '9', 13: 'a', 14: 'b', 15: 'c', 16: 'd', 17: 'e', 18: 'f', 19: 'g', 20: 'h', 21: 'i', 22: 'j', 23: 'k', 24: 'l', 25: 'm', 26: 'n', 27: 'o', 28: 'p', 29: 'q', 30: 'r', 31: 's', 32: 't', 33: 'u', 34: 'v', 35: 'w', 36: 'x', 37: 'y', 38: 'z'}


In [4]:
### GRADED FUNCTION: clip

def clip(gradients, maxValue):
    '''
    Clips the gradients' values between minimum and maximum.
    
    Arguments:
    gradients -- a dictionary containing the gradients "dWaa", "dWax", "dWya", "db", "dby"
    maxValue -- everything above this number is set to this number, and everything less than -maxValue is set to -maxValue
    
    Returns: 
    gradients -- a dictionary with the clipped gradients.
    '''
    
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
   
    ### START CODE HERE ###
    # clip to mitigate exploding gradients, loop over [dWax, dWaa, dWya, db, dby]. (≈2 lines)
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient,-maxValue,maxValue,out=gradient)
    ### END CODE HERE ###
    
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    
    return gradients

In [5]:
# GRADED FUNCTION: sample

def sample(parameters, char_to_ix):
    """
    Sample a sequence of characters according to a sequence of probability distributions output of the RNN

    Arguments:
    parameters -- python dictionary containing the parameters Waa, Wax, Wya, by, and b. 
    char_to_ix -- python dictionary mapping each character to an index.
    seed -- used for grading purposes. Do not worry about it.

    Returns:
    indices -- a list of length n containing the indices of the sampled characters.
    """
    
    # Retrieve parameters and relevant shapes from "parameters" dictionary
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    ### START CODE HERE ###
    # Step 1: Create the one-hot vector x for the first character (initializing the sequence generation). (≈1 line)
    x = np.zeros((vocab_size,1))
    # Step 1': Initialize a_prev as zeros (≈1 line)
    a_prev = np.zeros((n_a,1))
    
    # Create an empty list of indices, this is the list which will contain the list of indices of the characters to generate (≈1 line)
    indices = []
    
    # Idx is a flag to detect a newline character, we initialize it to -1
    idx = -1 
    
    # Loop over time-steps t. At each time-step, sample a character from a probability distribution and append 
    # its index to "indices". We'll stop if we reach 50 characters (which should be very unlikely with a well 
    # trained model), which helps debugging and prevents entering an infinite loop. 
    counter = 0
    newline_character = char_to_ix['\n']
    
    while (idx != newline_character and counter != 50):
        
        # Step 2: Forward propagate x using the equations (1), (2) and (3)
        a = np.tanh(np.add(np.add(np.matmul(Wax,x),np.matmul(Waa,a_prev)),b))
        z = np.add(np.matmul(Wya,a),by)
        y = softmax(z)
        
        # for grading purposes
        #np.random.seed(counter+seed) 
        
        # Step 3: Sample the index of a character within the vocabulary from the probability distribution y
        idx = np.random.choice(np.arange(39
                                        ), p = y.ravel())

        # Append the index to "indices"
        indices.append(idx)
        
        # Step 4: Overwrite the input character as the one corresponding to the sampled index.
        x = np.zeros((vocab_size,1))
        x[idx] = 1
        
        # Update "a_prev" to be "a"
        a_prev = a
        
        # for grading purposes
        #seed += 1
        counter +=1
        
    ### END CODE HERE ###

    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    return indices

In [6]:
def optimize(X,Y,a_prev,parameters,learning_rate=0.01):
    loss,cache=rnn_forward(X,Y,a_prev,parameters)
    gradients,a=rnn_backward(X,Y,parameters,cache)
    gradients=clip(gradients,5)
    parameters=update_parameters(parameters,gradients,learning_rate)
    return loss,gradients,a[len(X)-1]

In [9]:
def model(data,ix_to_char,char_to_ix,num_iterations=1000000,n_a=50,a_names=7,vocab_size=39):
    n_x,n_y=vocab_size,vocab_size
    parameters=initialize_parameters(n_a,n_x,n_y)
    loss=get_initial_loss(vocab_size,a_names)
    with open("combined.txt",encoding='utf8') as f:
        examples=f.readlines()
    examples=[x.lower().strip() for x in examples]
    np.random.shuffle(examples)
    a_prev=np.zeros((n_a,1))
    for j in range(num_iterations):
        index=j%len(examples)
        X=[None]+[char_to_ix[ch] for ch in examples[index]]
        Y=X[1:]+[char_to_ix["\n"]]
        curr_loss,gradients,a_prev=optimize(X,Y,a_prev,parameters,learning_rate=0.01)
        loss=smooth(loss,curr_loss)
        if j%2000==0:
            print('Iteration: %d, Loss %f' % (j,loss) + '\n')
            for name in range(a_names):
                sampled_indices=sample(parameters,char_to_ix)
                print_sample(sampled_indices,ix_to_char)
            print('\n')
    return parameters

In [10]:
parameters=model(data,ix_to_char,char_to_ix)

Iteration: 0, Loss 25.648594

Ownsd44my
Rc89pvj01mk1zfed.rcgjd
Mw43z22
Brcg3p63559th4v11ehhqwhqarvinwn69at2pawg.kus71h6
P68ig5r8fb2nc5.radtqu4ev6nfjb6pb5xe
Pixbk6
Bzoiz.e4bd71qqif5.w7ajk ljfcm92mplccwod


Iteration: 2000, Loss 37.488695

Yicbote er sesteotdu.deag laa
Rimidt
Bewdodtewtirk
Gasgbein
Hhescarabe lkek
E lal n
Vrar kfirirts


Iteration: 4000, Loss 35.698545

Biejyis
Vocleod hore
Marknem
 daston
Sichry
Addrevyinnerw kdiingkootgety bed arobe
Ledur


Iteration: 6000, Loss 34.163457

Gnexondander
Plat
Alaran
Cooca papkee 0pppindene peridte fbanf
Dlerke
Phetreme
Bwy san me5mamker afrima grckuf


Iteration: 8000, Loss 33.701684

Tomp hinto
Janigh
Uofler
Zaray
Oorinn
Exa kex
Arprotomsrey naller cayebo


Iteration: 10000, Loss 32.839182

Hova
Leshlin
Geronker
Rivstise
Suce
Dr cat cenche
Aura


Iteration: 12000, Loss 32.681816

Bean
Orisich wouclor
Bidch  gald
3as phepore
Herf
Mand
Mwiini


Iteration: 14000, Loss 32.489583

Annida
Menjiens sexy sexy stcl gotet
Illess
Letresk doll
Haet

Iteration: 126000, Loss 28.881354

Sppo
N thammst
Siper anmir s4omanid
Spourth acentia
Vothta theenteric ich
Schee
Srada


Iteration: 128000, Loss 29.249254

Chesce
Pulny pennyoshron
Zamperer
Seddlesqoge
Keddl the zareme
Phich ean
Pirowersenslalde cduttlet


Iteration: 130000, Loss 29.455819

Penny numons
Cottaus
Bunn
Pink
Buss
Ther
Twist


Iteration: 132000, Loss 29.213103

Shicgar staluspcrrois
Pan
Zorderron
Doniek whinziboon
Mad meter quertiing
The cizzer
Scape giol


Iteration: 134000, Loss 30.012584

Glocops
Pradeen lick and frasondury
Tedter fotter
Homes wigwogor
Bilby divard
Starewol
Princlerbers


Iteration: 136000, Loss 28.989955

Gook  oostong
Teuchon
Runger
Shorner
Sad worrod pony the ghirlemder oft
Dampy wold oots
Petele


Iteration: 138000, Loss 29.279317

Collie of wizzo
The fall whre
Jen
Bldend
The veelles tlenst
Pein
Pan


Iteration: 140000, Loss 29.497925

Sexy suremy witch nun cape phing beth sancler
Janled dattast
Sixy
Cat  quit
Zobblost
Redead minky scubek
Princepoy

Iteration: 248000, Loss 28.989333

Godd
The woba
Devey
Beall
Player werle surthers
Werehirghnoake
5ossterc


Iteration: 250000, Loss 29.171861

Pitter
Pactant
Vambi
Peter
Wolf s crasmine
Binde
Caphar


Iteration: 252000, Loss 28.422471

Arm
Parpit
Shosta
Hail lelisr
Meer
Proahtry
Pith


Iteration: 254000, Loss 28.458204

Scarea kinghernaun
Jlelladizoga baaness
Pawnio ded
Spider
The angilzata punk marter
Jumty girbedd
Weeanctybing gangman


Iteration: 256000, Loss 29.428313

Twarken
Pacry tumc pirate  fighty panen can anssa
Popepesex
Teb cosh sexy cicewbavi
Womokie berss
Terton putereric thedon
Thost


Iteration: 258000, Loss 28.368372

Pringer
Coge wotron queed brim rld
Male
Pefitor
Drintant
Maper
Pentelf man


Iteration: 260000, Loss 28.702100

Coin a bleandhels
Peake chinle
Pbrho main hobotut
Prince umust
Sexy coghn sic
Cloin schgll
Midpennerss indel


Iteration: 262000, Loss 28.930227

The vamp s srai
Parl
Fartords
Gharmurg ost
Twi
Plipth spavain krughocap siidaur
Coop


Iteration: 

Iteration: 374000, Loss 28.584912

Captain
Therefssen
Sexy the cleany martor
Ter wibertd
Pigare ginsburl
Humper
Jegmed


Iteration: 376000, Loss 28.586948

Fint
Prack raprypet
Firdal sexy ind beypermorri
Ckapesstt
Sampunter
Fretter
Girl


Iteration: 378000, Loss 28.256485

The fonald prish of gols
Doli man
Saden snaceloot
Sheech ssoman
Sexy shargs of toman lolit
Sex  foory s maidion
Jellers bugical pank witch


Iteration: 380000, Loss 28.669621

Giderberty
Salat
Bettanleg
Bat
Gransbightschabbuis
Gait
Ggerth


Iteration: 382000, Loss 28.944156

Justroworanaid garase
Sexy dom wica
Doft k0bro
Pilather
The jan
The
Jolk bridtuan


Iteration: 384000, Loss 28.292547

Pantus
Vethars sclencor
Ppefale and dicerair ank girt crimpiry puth princm
Zome
Pirncroal bratcro
Sexy k beat
Taro


Iteration: 386000, Loss 28.293598

Sail
Zacuplar
Temen
Patain in thumen
Zonching
Gidnan
Pwing basber


Iteration: 388000, Loss 29.093304

Printcmimer
Nwols kddy
Pichker
Hadress cimen caridre candy clont
The grinish

Iteration: 500000, Loss 28.719107

Gother baronde
Gist
Thrudry darrins cheellen ticure sharetramo
Sut  pinkurisebelly fooner
Worce
Pemry pinker fola
Zam truserinongingea


Iteration: 502000, Loss 28.703605

Dond falling
Chashion ganows
Gwanett
Pimo
Path hadza
Malder cat jace pineen
 pank rex


Iteration: 504000, Loss 28.466445

Thack mariold
Jomerbothdery trughes man mamgarder
Maghonkarr corashy
Crids
Gadyon
Maght
Fizus


Iteration: 506000, Loss 28.660031

Tung hission
Thackay banning zombie percescicirspufmcalerele me
Preacoe tree
Sexy with a touggans
Tibool wrdermes
Coher apetiirerd chow berdes
Prinnig 80enchey pires


Iteration: 508000, Loss 28.700261

Bog gra dedelic powny
Sarmas whutie cobe toud alostrel
Weaver
Costant melatwos uniens
Pocs pupes
Jolar tombick laly potex  fairy queack senakal  ne
Gilxy puraur


Iteration: 510000, Loss 28.293696

Sucoorowingon
Jack skimbigle hermas
Liud
Ffrl
Spey dog
Wish
Qun the roosburg


Iteration: 512000, Loss 28.992576

Phongher worper
Pastan
S

Iteration: 620000, Loss 29.134966

Mursion doctevie creenit mgump ponulsuse
Mordist
Lest
The lowaid in wits older wobestuc
Pet
Wizzomors
1lunke


Iteration: 622000, Loss 29.208486

Pegheri
Heustrans conk
Jack s slow ae toop linaurfabe
Preep
Jeen spad iragop
Bot
Wob


Iteration: 624000, Loss 28.828753

The spader
Clownit
Weaghiler
Suicion
Fhore
Clacket
Jeacer


Iteration: 626000, Loss 29.113635

Weapedestman poezfientorbax altwiles man
Flow
Tolly
The
Papoffarger
Dolyfu
Teme


Iteration: 628000, Loss 29.223361

Juss eretor guns
Giradce leadler rafperin gire
Jocker
Rockira
Red woay
Ss. zo lie
Thinchumina


Iteration: 630000, Loss 28.833879

Mydie
Veacuiticuin
The the
Zomo
Ssoxy oniuls woman
Mothch
Vellauze


Iteration: 632000, Loss 29.167437

Pather
The paman
A skapplk
Moger  firi shoryhlederdant
Clom hip poclowerson purgecher ge bost besezeviran
Terlen in worse
10xeper


Iteration: 634000, Loss 29.018966

Revie mad
Punk unca
Metrea gaten
Mattet the
Phutatatt pit
Sacdpty alocbone
Jumppin


Iteration: 744000, Loss 28.828520

The
Jomsey
Coburmuithin
Tamp
Pirwiced coo
Zarmse of racens corppow marify
Jue


Iteration: 746000, Loss 28.500419

Gins
Drunker hoster
Thentme
Growre mmall
Gille
Vem doctars
The


Iteration: 748000, Loss 29.576078

The licery hob se creding oc tordbie
Thoman
Mecha darkuc
Shitth lipred maniu matigovin starila
Caldoal doved in litra covizn s doll
Hendres
Vavins


Iteration: 750000, Loss 28.787374

Chobider boy lideret ken ranien
Tomoxanigle click
Skelungrif dape
Mandermermeron in
Fur
Jombare
Roboher


Iteration: 752000, Loss 28.832789

Docteir primar box
Zombie chahess spitce rong id panta pirean triman 
Vikce
Pan
Thdzars borse
Thuncaned bear kirgon
Viggg


Iteration: 754000, Loss 28.916294

Zhe ungi parrie punteringtermerus
Paster
Bavan in wisy prink the berrongire
Vim trop
Belly
Theer borge brisimian marr omk ft elfaf bail
Vigg


Iteration: 756000, Loss 28.546910

Pan
Bumbar plawja divewin
Ssonsheal
F pumpkin
Sexy babole
Sexyra
Buw bason


Iteration: 

Iteration: 866000, Loss 28.375023

Plhiack kentirl
Naptist
Pampion ilovensha
Thatlesbie wimokaik fary
Proctsstore gistin
Playor
Zin


Iteration: 868000, Loss 28.221511

Poor
8 sexy grom r ward
Shiher ampion
Reveld car witco proda
19cah pank lllenerd carchgoon womby fal swoldsha s
Vamburd girl booda
Pirate


Iteration: 870000, Loss 28.853734

Pimal striet
Mamader
Hoods kith catewerey ssuprevirepus cattor a unbi
Jau garr
Caurt
Duper
Corter fuiters  frie engel sharl me


Iteration: 872000, Loss 28.259017

Vex
Slugusbus
Frildishnacor
Bo
19cer
Truth
Mam morsence


Iteration: 874000, Loss 28.698203

Blana herirgit
Peait 
Princess
Pweerfpemenianda
Speermlus
Deak teap
10asone


Iteration: 876000, Loss 28.510067

Moonh
Bat card ofwim
Buechake
Fiply
Jaim
Pack dist witchures stew
Ghost


Iteration: 878000, Loss 28.432056

Miss hongoner
Zhatcadonis
Dov
Placounskoot
The toco nighter
Dog
Tira


Iteration: 880000, Loss 29.355438

Jembla tericelibard lets
She ly mamad
Zel che
Celette
Bustlent the calk

Iteration: 992000, Loss 31.177742

Witcla pudch jaston
Giobbat
Gandse tat
Didinnit lray ardet hiin isesry wrichsifacs
Sexy porach sexlice stody
Zo peffis pigy jakie piking
Gint goon


Iteration: 994000, Loss 33.035770

Dor mpi cobistirl
Jowaber
1aru
Zestwlester
Gindeantcor
Poniskyar
Zarshut orow


Iteration: 996000, Loss 35.463218

Qhop
Doxs
Thed pelst ieced gogmstr tibar opiccmiichat.ffina

Fats
Wnons bimlon aridcud cicersushgwina ared eustred d
Guc typyofdead chow r vany2sawie doaporet r2tista 
Par


Iteration: 998000, Loss 37.268053

Joeveot leoe actoyhqer gexs
Rodete
Wueand eiatgexyddinerssdapereruywe ceresapcyernzod
Wichreboti
Qir
Wioschen
Ver


