# File handling
- "r" reads the file
- "w" writes to file
- "a" appends to the file 
- "x" creates a file (gives error if it already exists)

In [10]:
#with -> use this when opening files
#takes care of error handling, otherwise we have to do try except
#takes care of closing down the file
#we can use .. to go up a folder
with open("Files/quotes.txt", "r") as f:
    text = f.read()

#print(text)
print(repr(text)) #When running this you will see what more is inside the string. For example \n are added

'  If     we     knew what it was      we were doing, it would not be called research,          would it?     - Albert Einstein\n\nTime is a drug. Too       much of it kills you.  -  Terry Pratchett\n\n\n An expert is a person who       has made all the mistakes that           can be made in a          very narrow field - Niels Bohr\n\n   Everything must be made as simple as possible. But not simpler. - Albert Einstein     \n\n\n  Nothing in life                is to be feared, it is only to be understood. Now is the time to understand more, so that we may fear less. - Marie  Curie  \n\nIf I have seen further     it is by standing on the shoulders of Giants. - Isaac Newton'


## Clean up quotes.txt
- inspect quotes
- remove leading and trailing whitespaces
- remove excessive whitespaces between words
- add quote numbers
- extract authors

In [14]:
path = "Files/quotes.txt"

with open(path, "r") as f1:
    #quotes = f1.readlines()#Now we get the file as a list
    #print(quotes)

    quotes = [quote.strip("\n") for quote in f1.readlines()]
    print(quotes)

['  If     we     knew what it was      we were doing, it would not be called research,          would it?     - Albert Einstein', '', 'Time is a drug. Too       much of it kills you.  -  Terry Pratchett', '', '', ' An expert is a person who       has made all the mistakes that           can be made in a          very narrow field - Niels Bohr', '', '   Everything must be made as simple as possible. But not simpler. - Albert Einstein     ', '', '', '  Nothing in life                is to be feared, it is only to be understood. Now is the time to understand more, so that we may fear less. - Marie  Curie  ', '', 'If I have seen further     it is by standing on the shoulders of Giants. - Isaac Newton']


In [60]:
import re
path = "Files/quotes.txt"
path_clean = "Files/quotes_cleaned.txt"
i = 1 #Set i to 1 so that we can use this to get the number of the quote

with open(path, "r") as f1, open(path_clean, "w") as f2: #Second part opens a new file that we can write to
    
    f2.write("Famous quotes\n\n") #Writes a headline to the new file with two new lines after
    for quote in f1.readlines():
        quote = quote.strip(" \n") #Removes leading and trailing whitespaces and newlines
        quote = re.sub(" +", " ", quote) #" +" means one whitespace OR several whitespaces should be substituted with one whitespace
        #We still have empty strings in our files
        if quote != "":
            f2.write(f"{i}. {quote}\n")
            i += 1

## Extract authors from quotes_cleaned.txt

In [61]:
path_clean = "Files/quotes_cleaned.txt"

with open(path_clean, "r") as f1, open(path_clean, "a") as f2: #We open it in two different modes
    #print(f1.read())
    quotes = [quote.strip("\n") for quote in f1.readlines() if quote[0].isdigit()] #Removes the empty strings, since all our quotes start with a digit
    #print(quotes)
    authors = [quote.split()[-2:] for quote in quotes] #Chooses the last two elements, starting at -2 and goes to the end (-1)
    #set picks out all of the unic elements in a list, one of the names Albert Einstein is removed; set([4, 4, 4, 4, 1, 1, 2]) -> {1, 2, 4}
    authors = set([" ".join(author) for author in authors]) #For each author join the names with a whitespace and put them all in a list
    print(authors)

    f2.write("\nAuthors: ")
    
    for author in authors:
        f2.write(f"{author}, ")

{'Marie Curie', 'Niels Bohr', 'Terry Pratchett', 'Albert Einstein', 'Isaac Newton'}
