In [1]:
from typing import List, Tuple
import pandas as pd
import numpy as np
import random
import os

In [39]:
df = pd.DataFrame()
for file in sorted(os.listdir("./vocab_database")):
    if file.endswith('.csv') and not file.startswith('Kahoot') and not file.startswith('Master') and not file.startswith('VERB'):
        print(file)
        temp_df = pd.read_csv(f"./vocab_database/{file}", names = ['English', 'French', 'Antonym'], dtype = str)
        temp_df['Category'] = file.replace('.csv', '')
        df = df.append(temp_df, ignore_index = True)
df['Antonym'] = df['Antonym'].replace(np.nan, "")
df

New.csv
body.csv
careers.csv
chores.csv
directions.csv
dishes_and_silverware.csv
drinks.csv
feelings.csv
food.csv
house.csv
military.csv
nature.csv
personalities.csv
phys_description.csv
prepositions.csv
relatives.csv
school.csv
verbs.csv
weather.csv


Unnamed: 0,English,French,Antonym,Category
0,non-alcoholic drinks,boissons non alcoolisées,,New
1,coffee,café,,New
2,espresso,express;café,,New
3,hot chocolate,chocolat (chaud),,New
4,juice,jus,,New
...,...,...,...,...
741,foggy,du brouillard,,weather
742,raining,pleut,,weather
743,pouring,pleut à verse,,weather
744,snowing,neige,,weather


In [40]:
df['French'] = df['French'].apply(lambda s:set(s.split('; '))-{''})
df['Antonym'] = df['Antonym'].apply(lambda s:set(s.split('; '))-{''})
df

Unnamed: 0,English,French,Antonym,Category
0,non-alcoholic drinks,{boissons non alcoolisées},{},New
1,coffee,{café},{},New
2,espresso,{express;café},{},New
3,hot chocolate,{chocolat (chaud) },{},New
4,juice,{jus},{},New
...,...,...,...,...
741,foggy,{du brouillard},{},weather
742,raining,{pleut},{},weather
743,pouring,{pleut à verse},{},weather
744,snowing,{neige},{},weather


In [41]:
# Merge duplicate keys

duplicate_keys = df[df.duplicated('English')]['English']
for i, s in duplicate_keys.iteritems():
    print(f"Found duplicate key: {s}")
    sli = df[df['English'] == s]
    for idx, row in list(sli.tail(len(sli)-1).iterrows()):
        sli.iloc[0]['French'] |= row['French']
        sli.iloc[0]['Antonym'] |= row['Antonym']
    df.drop(index = sli.tail(len(sli)-1).index, inplace = True)

Found duplicate key: to do the dishes
Found duplicate key: non-alcoholic drinks
Found duplicate key: coffee
Found duplicate key: espresso
Found duplicate key: hot chocolate
Found duplicate key: juice
Found duplicate key: lemonade
Found duplicate key: milk
Found duplicate key: pop, soda, soft drink
Found duplicate key: citrus soda
Found duplicate key: cola
Found duplicate key: lemon soda (eg, 7-Up)
Found duplicate key: tea
Found duplicate key: iced tea
Found duplicate key: herbal tea
Found duplicate key: water
Found duplicate key: still / plain water
Found duplicate key: sparkling / mineral water
Found duplicate key: tap water
Found duplicate key: alcoholic drinks
Found duplicate key: after-dinner drink
Found duplicate key: anise-flavored apéritif
Found duplicate key: beer
Found duplicate key: beer on tap
Found duplicate key: champagne
Found duplicate key: cocktail, pre-dinner drink
Found duplicate key: hard cider
Found duplicate key: wine
Found duplicate key: milk
Found duplicate key: 

In [42]:
df = df.set_index('English')

In [43]:
df['Category'].unique()

array(['New', 'body', 'careers', 'chores', 'directions',
       'dishes_and_silverware', 'feelings', 'food', 'house', 'military',
       'nature', 'personalities', 'phys_description', 'prepositions',
       'relatives', 'school', 'verbs', 'weather'], dtype=object)

In [44]:
all_vocab = set()
all_french_words = set()
for l in df['French']:
    all_vocab |= l
    all_french_words |= l
for l in df['Antonym']:
    all_vocab |= l
print(len(all_vocab))
print(len(all_french_words))

914
867


In [45]:
cat_to_vocab = df.groupby('Category').apply(lambda x: set.union(*x.French, *x.Antonym))
cat_to_french = df.groupby('Category').apply(lambda x: set.union(*x.French))
cat_to_vocab.loc['all'] = all_vocab
cat_to_french.loc['all'] = all_french_words

In [46]:
cats_with_synonyms = df['Category'][df['French'].apply(lambda x: len(x)>1)] # More than 1 translation
cats_with_synonyms

English
mouth             body
manager        careers
teacher        careers
to cook         chores
amazed        feelings
               ...    
husband      relatives
binder          school
to build         verbs
to reopen        verbs
to shine         verbs
Name: Category, Length: 99, dtype: object

In [47]:
cats_with_antonyms = df['Category'][df['Antonym'].apply(lambda x:len(x)>0)]
cats_with_antonyms

English
north             directions
south             directions
east              directions
west              directions
left              directions
                    ...     
for             prepositions
without         prepositions
according to    prepositions
under           prepositions
on              prepositions
Name: Category, Length: 108, dtype: object

In [48]:
def generate_eng_to_french(category:str = None, num_of_answers = 4)->Tuple[str, List[str], str]:
    if category is None:
        category = random.choice(df['Category'])
    word = random.choice(df[df['Category'] == category].index)
    chosen_french_def = random.sample(df.loc[word]['French'], 1)[0]
    incorrect_answers = random.sample(cat_to_vocab[df.loc[word, 'Category']]-df.loc[word, 'French'], k = num_of_answers-1)
    return word, random.sample(incorrect_answers+[chosen_french_def], num_of_answers), chosen_french_def

In [49]:
def generate_french_to_eng(category:str = None, num_of_answers = 4):
    if category is None:
        category = random.choice(df['Category'])
    word = random.sample(cat_to_french[category], 1)[0]
    possible_eng_defs = df[df.apply(lambda x: word in x['French'], axis = 1)].index
    chosen_eng_def = random.choice(possible_eng_defs)
    incorrect_answers = random.sample(set(df[df['Category'] == df.loc[chosen_eng_def, 'Category']].index)-set(possible_eng_defs), k = num_of_answers-1)
    return word, random.sample(incorrect_answers+[chosen_eng_def], num_of_answers), chosen_eng_def

In [50]:
def generate_synonym(category:str = None, num_of_answers = 4, answers_in_same_category = True):
    if category is None:
        category = random.choice(cats_with_synonyms)
    series_with_synonyms = cats_with_synonyms[cats_with_synonyms == category].index
    word = random.choice(series_with_synonyms)
    french_synonym_chosen = random.sample(df.loc[word, "French"], 1)[0]
    synonyms = set()
    for poss in df[df['Category'] == category]['French']:
        if french_synonym_chosen in poss:
            synonyms |= poss
    incorrect_answers = random.sample((cat_to_vocab.loc[category] if answers_in_same_category else all_vocab)-synonyms, num_of_answers-1)
    synonyms.remove(french_synonym_chosen)
    french_synonym_chosen_2=random.sample(synonyms, 1)[0]
    return french_synonym_chosen, random.sample(incorrect_answers+[french_synonym_chosen_2], num_of_answers), french_synonym_chosen_2

In [51]:
def generate_antonym(category:str = None, num_of_answers = 4):
    if category is None:
        category = random.choice(cats_with_antonyms)
    chosen_english_word = random.choice(cats_with_antonyms[cats_with_antonyms == category].index)
    chosen_french_word = random.sample(df.loc[chosen_english_word, 'French'], 1)[0]
    chosen_antonym = random.sample(df.loc[chosen_english_word, 'Antonym'], 1)[0]
    synonyms = set()
    for poss in df['French']:
        if chosen_french_word in poss:
            synonyms |= poss
    for poss in df['Antonym']:
        if chosen_antonym in poss:
            synonyms |= poss
    incorrect_answers = random.sample(cat_to_vocab.loc[category]-synonyms, num_of_answers-1)
    return chosen_french_word, random.sample(incorrect_answers+[chosen_antonym], num_of_answers), chosen_antonym

In [52]:
def format_tuple(tup):
    return f'{tup[0]}\n{"    ".join(tup[1])}'


In [53]:
def print_tup(f, **kwargs):
    tup = f(**kwargs)
    print(format_tuple(tup))
    print(tup[2])


In [54]:
def print_eng_to_french(category = None):
    print_tup(generate_eng_to_french, category = category)

In [55]:
def print_french_to_eng(category = None):
    print_tup(generate_french_to_eng, category = category)

In [56]:
def print_different_cat_synonym():
    print_tup(generate_synonym, answers_in_same_category = False)

In [57]:
def print_same_cat_synonym():
    print_tup(generate_synonym, answers_in_same_category = True)

In [58]:
def print_antonym():
    print_tup(generate_antonym)

In [59]:
print_eng_to_french()
print_eng_to_french()
print_eng_to_french()

impatient
vif    habile    raisonnable    enthousiaste
vif
west
haut    loin    droite    ouest
ouest
apartment
grenier    plafond    armoire    appartement
appartement


In [60]:
print_french_to_eng()
print_french_to_eng()
print_french_to_eng()

avoir faim
plum    garlic    to be hungry    cream
to be hungry
orange
orange    vanilla    blueberry    veal
orange
véranda
porch    hall    floor    ceiling
porch


In [61]:
print_different_cat_synonym()
print_different_cat_synonym()
print_different_cat_synonym()

chaleureux
salle de séjour    glisser    cordial    être
cordial
pressé
retenir    salle de classe    impatient    banal
impatient
miroir
gâteau    taille-crayon    glace    introverti
glace


In [62]:
print_same_cat_synonym()
print_same_cat_synonym()
print_same_cat_synonym()

éducateur(e)
avocat(e)    charpentier(ère)    soldat    enseignant(e)
enseignant(e)
dessous
sur    parmi    sous    en
sous
entrée
tomate    vanille    sucre    hors d'œuvre
hors d'œuvre


In [63]:
print_antonym()
print_antonym()
print_antonym()

horrifié
seul    posé    isolé    nerveux
posé
affligé
actif    pressé    posé    énergique
posé
fort
petit    joli    dégarni    faible
faible


In [64]:
class AnswerModifier:
    def __init__(self, condition, action):
        self.condition = condition
        self.action = action

def ca(c, a):
    return AnswerModifier(c, a)

def replace_some(old:str, news:list):
    def replacer(s:str):
        return random.choice(news).join(s.split(old, random.randint(1, s.count(old))) if random.choice([True, False]) else s.rsplit(old, random.randint(1, s.count(old))))
    return replacer

answer_modifiers = [
    ca(lambda x:'e' in x, replace_some('e', ['é', 'è', 'ê', 'es', 'ee'])), 
    ca(lambda x:'é' in x, replace_some('é', ['e', 'è', 'ê', 'es', 'ee'])), 
    ca(lambda x:'è' in x, replace_some('è', ['é', 'e', 'ê', 'es', 'ee'])), 
    ca(lambda x:'ê' in x, replace_some('ê', ['é', 'è', 'e', 'es', 'ee'])), 
    ca(lambda x:'es' in x, replace_some('es', ['é', 'è', 'ê', 'e', 'ee'])), 
    ca(lambda x:'ee' in x, replace_some('ee', ['é', 'è', 'ê', 'es', 'e'])), 
    ca(lambda x:'a' in x, replace_some('a', ['à', 'â'])), 
    ca(lambda x:'à' in x, replace_some('à', ['a', 'â'])), 
    ca(lambda x:'â' in x, replace_some('â', ['à', 'a'])), 
    ca(lambda x:'c' in x, replace_some('c', ['ç'])), 
    ca(lambda x:'ç' in x, replace_some('ç', ['c'])), 
    ca(lambda x:'s' in x, replace_some('s', ['ss', 'se'])), 
    ca(lambda x:'ss' in x, replace_some('ss', ['s', 'se'])), 
    ca(lambda x:'se' in x, replace_some('se', ['ss', 's'])), 
    ca(lambda x:'i' in x, replace_some('i', ['is', 'iss'])), 
    ca(lambda x:'is' in x, replace_some('is', ['i', 'iss'])), 
    ca(lambda x:'iss' in x, replace_some('iss', ['is', 'i'])), 
    ca(lambda x:'n' in x, replace_some('n', ['gn'])), 
    ca(lambda x:'gn' in x, replace_some('gn', ['n'])), 
    
]
#todo cartesian matrix stuff
answer_modifiers[0].condition('pendant')

True

In [67]:
irregular_verb_df = pd.read_csv('VERB - irregular.csv', names = ['Verb', 'Meaning', 'je', 'tu', 'il', 'nous', 'vous', 'ils'], dtype = str)
irregular_verb_df.set_index('Verb', inplace = True)
irregular_verb_df.head()
short_ir_verb_series = pd.read_csv('VERB - short ir verbs.csv', names = ['Verb', 'Meaning'], dtype = str).set_index('Verb')
verb_series = pd.read_csv('./vocab_database/verbs.csv', names = ['Meaning', 'Verb'], dtype = str).set_index('Verb')
short_ir_verb_series.head()

Unnamed: 0_level_0,Meaning
Verb,Unnamed: 1_level_1
partir,to leave
consentir,to consent
départir,to accord
dormir,to sleep
endormir,to put/send to sleep


In [68]:
class Rule:
    a = 1
    def __init__(self, condition, conjugate, condition_for_mess_up = lambda pronoun, verb:False):
        self.condition = condition
        self.conjugate = conjugate
        self.condition_for_mess_up = condition_for_mess_up
        self.a = Rule.a
        Rule.a+=1
    def __str__(self):
        return f"{self.a}"

rules = [
    Rule(lambda pronoun, verb:verb in irregular_verb_df.index, lambda pronoun, verb:irregular_verb_df.loc[verb, pronoun]), 
    Rule(lambda pronoun, verb:verb.endswith('crire') and pronoun in ['nous', 'vous', 'ils'], lambda pronoun, verb:verb[:-2]+{'nous':'vons', 'vous':'vez', 'ils':'vent'}[pronoun], lambda pronoun, verb:verb.endswith('re') and pronoun in ['nous', 'vous', 'ils']), 
    Rule(lambda pronoun, verb:verb[-4:] in ['uire', 'dire', 'fire', 'lire'] and pronoun in ['il', 'nous', 'vous', 'ils'], lambda pronoun, verb:verb[:-2]+{'il':'t', 'nous':'sons', 'vous':'sez', 'ils':'sent'}[pronoun], lambda pronoun, verb:verb.endswith('re') and pronoun in ['il', 'nous', 'vous', 'ils']), 
    Rule(lambda pronoun, verb:verb in short_ir_verb_series.index, lambda pronoun, verb:verb[:-3]+{'je':'s', 'tu':'s', 'il':'t', 'nous':f'{verb[-3]}ons', 'vous':f'{verb[-3]}ez', 'ils':f'{verb[-3]}ent'}[pronoun], lambda pronoun, verb:verb.endswith('ir')), 
    Rule(lambda pronoun, verb:verb.endswith('rompre') and pronoun=='il', lambda pronoun, verb:verb[:-2]+'t', lambda pronoun, verb:verb.endswith('re') and pronoun=='il'), 
    Rule(lambda pronoun, verb:verb.endswith('enir'), lambda pronoun, verb:verb[:-4]+{'je':'iens', 'tu':'iens', 'il':'ient', 'nous':'enons', 'vous':'enez', 'ils':'iennent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:(verb.endswith('llir') or verb.endswith('frir') or verb.endswith('vrir')), lambda pronoun, verb:verb[:-2]+{'je':'e', 'tu':'es', 'il':'e', 'nous':'ons', 'vous':'ez', 'ils':'ent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:(verb.endswith('ayer') or verb.endswith('oyer') or verb.endswith('uyer')) and pronoun not in ["nous", 'vous'], lambda pronoun, verb:verb[:-3]+'i'+{'je':'e', 'tu':'es', 'il':'e', 'nous':'ons', 'vous':'ez', 'ils':'ent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:(verb.endswith('eler') or verb.endswith('eter')) and pronoun not in ["nous", 'vous'], lambda pronoun, verb:verb[:-3]+verb[-3]*2+{'je':'e', 'tu':'es', 'il':'e', 'nous':'ons', 'vous':'ez', 'ils':'ent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:verb.endswith('er') and verb[-4] in ['é', 'e'] and pronoun not in ["nous", 'vous'], lambda pronoun, verb:verb[:-4]+'è'+verb[-3]+{'je':'e', 'tu':'es', 'il':'e', 'nous':'ons', 'vous':'ez', 'ils':'ent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:verb.endswith('cer') and pronoun=='nous', lambda pronoun, verb:verb[:-3]+'çons', lambda pronoun, verb:verb.endswith('er') and pronoun=='nous'), 
    Rule(lambda pronoun, verb:verb.endswith('ger') and pronoun=='nous', lambda pronoun, verb:verb[:-2]+'eons', lambda pronoun, verb:verb.endswith('er') and pronoun=='nous'), 
    Rule(lambda pronoun, verb:verb.endswith('er'), lambda pronoun, verb:verb[:-2]+{'je':'e', 'tu':'es', 'il':'e', 'nous':'ons', 'vous':'ez', 'ils':'ent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:verb.endswith('ir'), lambda pronoun, verb:verb[:-2]+{'je':'is', 'tu':'is', 'il':'it', 'nous':'issons', 'vous':'issez', 'ils':'issent'}[pronoun], lambda x, y:True), 
    Rule(lambda pronoun, verb:verb.endswith('re'), lambda pronoun, verb:verb[:-2]+{'je':'s', 'tu':'s', 'il':'', 'nous':'ons', 'vous':'ez', 'ils':'ent'}[pronoun], lambda x, y:True), 
]
#todo ir verbs

In [69]:
def generate_irregular_verb(num_of_answers = 4):
    verb = random.choice(irregular_verb_df.index)
    pronoun = random.choice(irregular_verb_df.columns[2:])
    conjugation = irregular_verb_df.loc[verb, pronoun]
    incorrects = random.sample(set(irregular_verb_df.loc[verb][1:])-{conjugation}, 3)
    return f"{verb}, {pronoun}", random.sample(incorrects+[conjugation], num_of_answers), conjugation

In [70]:
def generate_verb(verb = None, pronoun = None, num_of_answers = 4):
    verb = verb if verb else random.choice(verb_series.index)
    pronoun = pronoun if pronoun else random.choice(['je', 'tu', 'il', 'nous', 'vous', 'ils'])
    # print(next(filter(lambda rule:rule.condition(pronoun, verb), rules)))
    conjugation = next(filter(lambda rule:rule.condition(pronoun, verb), rules)).conjugate(pronoun, verb)
    potential_incorrects = set(map(lambda rule:rule.conjugate(pronoun, verb), filter(lambda rule:rule.condition_for_mess_up(pronoun, verb), rules)))-{conjugation}
    chosen_incorrects = random.sample(potential_incorrects, 3)
    return f"{verb}, {pronoun}", random.sample(chosen_incorrects+[conjugation], num_of_answers), conjugation
generate_verb('reconduire', 'il')

('reconduire, il',
 ['reconduiie', 'reconduiit', 'reconduit', 'recondui'],
 'reconduit')

In [71]:
ALL_CORRECT='1, 2, 3, 4'
generate_instructional_question = lambda **kwargs:(kwargs['instruction'], ['ok']*4, ALL_CORRECT)
question_entries = [
    #ENFR
    (generate_instructional_question, {'instruction':'English To French'}, 1, 0.4), 
    (generate_eng_to_french, {'category':'New'}, 5), 
    (generate_eng_to_french, {}, 5),
    # FREN
    (generate_instructional_question, {'instruction':'French To English'}, 1, 0),
    (generate_french_to_eng, {'category':'New'}, 5), 
    (generate_french_to_eng, {}, 5), 
    # SYN    
    (generate_instructional_question, {'instruction':'French Synonym'}, 1, 0.2), 
    (generate_synonym, {}, 5), 
    # ANT
    (generate_instructional_question, {'instruction':'French Antonym'}, 1, 0.2), 
    (generate_antonym, {}, 5), 
    # IRREG
    (generate_instructional_question, {'instruction':'Conjugate Verb'}, 1, 0), 
    (generate_verb, {}, 10)
]

qdf = pd.DataFrame(columns = ['Questions', 'Answer 1', 'Answer 2', 'Answer 3', 'Answer 4', 'Time', 'Correct Answer'])
vocab_list = set()
index = 0
mess_up_answer_chance = 0.2
chance_to_mess_up_correct = 0.5
enable_replacers = True

for question_entry in question_entries:
    func_to_call = question_entry[0]
    params = question_entry[1]
    num_of_times = question_entry[2]
    for question_num in range(num_of_times):
        while True:
            tup = func_to_call(**params)
            if tup[0] in vocab_list:
                continue
            vocab_list.add(tup[0])
            if tup[2] == ALL_CORRECT: # Instructional questions
                correct_answer = tup[2]
                time = 5
            else:
                correct_answer = tup[1].index(tup[2])+1
                time = 20
                if random.random()<=mess_up_answer_chance:
                    if random.random()<=chance_to_mess_up_correct:
                        answer_index_to_mess_up = correct_answer-1
                    else:
                        answer_index_to_mess_up = random.randrange(4)
                    answer_to_mess_up = tup[1][answer_index_to_mess_up]
                    potential_answer_modifiers = list(filter(lambda x:x.condition(answer_to_mess_up), answer_modifiers))
                    if potential_answer_modifiers:
                        # print(tup[0])
                        answer_modifier = random.choice(potential_answer_modifiers)
                        messed_up_answer = answer_modifier.action(answer_to_mess_up)
                        tup[1][random.sample(set(range(4))-{correct_answer-1, answer_index_to_mess_up}, 1)[0]] = messed_up_answer
                
            qdf.loc[index] = [tup[0], *tup[1], time, correct_answer]
            index+=1
            break
            
    if func_to_call == generate_instructional_question:
        vocab_list.clear()
        mess_up_answer_chance = question_entry[3]
    
qdf

Unnamed: 0,Questions,Answer 1,Answer 2,Answer 3,Answer 4,Time,Correct Answer
0,English To French,ok,ok,ok,ok,5,"1, 2, 3, 4"
1,alcoholic drinks,thé,apéritif;apéro,vin,boissons alcoolisées,20,4
2,wine,vin,lait,bière,express;café,20,1
3,beer on tap,eau du robinet,boissons non alcoolisées,pression,cidre,20,3
4,"lemon soda (eg, 7-Up)",soda;boisson gazeuse,cidre,bière,limonade,20,4
5,champagne,champagne,tisane;infusion,çhampagne,pression,20,1
6,"sauce, dressing, gravy",avoir faim,carotte,sauce,gâteau,20,3
7,to share,partager,correspondre,obéir,danser,20,1
8,jar,plat,argenterie,pot,vase,20,3
9,arm,bras,genou,orteisl,orteil,20,1


In [72]:
qdf.to_csv('Kahoot Generated Questions.csv', encoding='utf-8-sig')

In [73]:
df.to_csv('Master Vocab.csv', encoding='utf-8-sig')

### Random Pronunciation Words

In [133]:
import csv
import itertools

In [134]:
with open('Master Pronunciation.csv', 'r', encoding='utf-8-sig') as f:
    cr = csv.reader(f)
    l = list(itertools.chain(*cr))

In [176]:
random.choice([w for w in l if True])

'citron'

### Generating Pronunciation

In [39]:
import pickle

In [40]:
with open('top10kwords.pkl', 'rb') as f:
    top10kwordslist = pickle.load(f)
top10kwordslist[:5]

['de', 'la', 'le', 'et', 'les']

In [41]:
[w for w in top10kwordslist if w.endswith('\n')]

[]

In [42]:
frequent_series = pd.Series(top10kwordslist)
frequent_series.head()

0     de
1     la
2     le
3     et
4    les
dtype: object

In [52]:
previous_max_rows = pd.get_option('display.max_rows')
pd.set_option('display.max_rows', None)
print(frequent_series[frequent_series.str.contains("oi")])
pd.set_option('display.max_rows', previous_max_rows)

67                 moins
94                  mois
105                 soit
108                 fois
133                trois
136                 doit
166           croissance
185                point
189                avoir
229                 voir
237              pouvoir
245                droit
271            toutefois
307               points
323              parfois
342                choix
343              doivent
363                  loi
366               savoir
367                 loin
495                 quoi
533               besoin
560             pourquoi
573               droits
574               moitié
607              voiture
683                 voit
695             l'emploi
697                voire
717               soient
733                  moi
753              d'avoir
762              moindre
790                 voie
792              prévoit
794                 noir
799                 bois
812            néanmoins
892              mémoire
915             voitures


In [44]:
verb_series[verb_series.index.str.startswith('u')]

Unnamed: 0_level_0,Meaning
Verb,Unnamed: 1_level_1


In [45]:
len(top10kwordslist)

10000

In [46]:
random.choice(frequent_series)

'ouverture'