# Python: Improvements and errors on functions 

## Functions with multiple execution paths

In [1]:
# Example 1
def your_age(age):
    if age >= 60:
        return("Old!")
    else:
        return("Young")

In [2]:
your_age(70)

'Old!'

In [3]:
# Example 2
def is_old(age):
    if age >= 60:
        return("Old!")

def is_young(age):
    if age < 60:
        return("Young!")

In [4]:
is_old(70)

'Old!'

In [5]:
is_young(40)

'Young!'

## Arguments

In [6]:
vocabulary = open("dictionnaire.txt", "r", encoding="utf-8").read()
text_string = open("texte.txt", "r", encoding="utf-8").read()

def clean_text(string, change_string_list, replacement_string):
    
    cleaned_string = string
    
    for change_string in change_string_list:
        cleaned_string = cleaned_string.replace(change_string, replacement_string)
    cleaned_string = cleaned_string.lower()
    
    return cleaned_string

change_string_list = [".", ",", "'", "\n"]
replacement_string = ""

In [7]:
def tokenize(string, change_string_list, replacement_string, clean = False):
    
    cleaned_text = string
    if clean:
        cleaned_text = clean_text(string, change_string_list, replacement_string)
    text_tokens = cleaned_text.split(" ")
    
    return text_tokens

In [8]:
tokenized_text = tokenize(text_string, change_string_list, replacement_string)
print(tokenized_text[0:10])

['En', 'termes', 'généraux,', 'la', 'scence', 'des', 'données', 'est', "l'extraction", 'de']


In [9]:
tokenized_text = tokenize(text_string, change_string_list, replacement_string, True)
print(tokenized_text[0:10])

['en', 'termes', 'généraux', 'la', 'scence', 'des', 'données', 'est', 'lextraction', 'de']


## Training: Improvement of our spell checker

In [10]:
def spell_check(vocabulary_file, text_file, special_characters=[",", ".", "'", "\n"], replacement_string=""):
    
    misspelled_words = []
    
    vocabulary = open(vocabulary_file, "r", encoding="utf-8").read()
    text = open(text_file, "r", encoding="utf-8").read()
    
    tokenized_vocabulary = tokenize(vocabulary, special_characters, replacement_string, clean=True)
    tokenized_text = tokenize(text, special_characters, replacement_string, clean=True)
    
    for token in tokenized_text:
        if token not in tokenized_vocabulary and token != "":
            misspelled_words.append(token)
    
    return misspelled_words

In [11]:
final_misspelled_words = spell_check(vocabulary_file="dictionnaire.txt", text_file="texte.txt")
print(final_misspelled_words)

['scence', 'conaissance', 'téorie', 'statistiqe', 'stokage', 'dicipline', 'come']


## Errors types

### Syntax errors

In [12]:
# SyntaxError
answer = 12'

SyntaxError: EOL while scanning string literal (Temp/ipykernel_4240/2962720375.py, line 2)

In [13]:
# SyntaxError
de show():
    print("Show!")

SyntaxError: invalid syntax (Temp/ipykernel_4240/141079354.py, line 2)

In [14]:
# IndentationError
def look():
    print("Hey!")
        print("Look!")

IndentationError: unexpected indent (Temp/ipykernel_4240/3354422552.py, line 4)

### Runtime errors

In [15]:
# TypeError
nine = 9
nine + "9"

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [16]:
# ValueError
float("Bye!")

ValueError: could not convert string to float: 'Bye!'

In [17]:
# IndexError
age = [12, 15, 18]
age[3]

IndexError: list index out of range

In [18]:
# AttributeError
f = open("texte.txt")
g = f.split(',')

AttributeError: '_io.TextIOWrapper' object has no attribute 'split'