In [3]:
with open('Documents/example_utf8.txt') as file:
    print(file.read())

20£



In [4]:
with open('Documents/example_iso.txt') as file:
    print(file.read())

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa3 in position 2: invalid start byte

In [5]:
# Open the ISO with the appropiate encoding 

In [7]:
with open('Documents/example_iso.txt', encoding='iso-8859-1') as file:
    print(file.read())

20£


In [9]:
# Open the UTF8 file and save its contents in a iso 8859 file 

with open('Documents/example_utf8.txt') as file:
    content = file.read()
    
with open('Documents/example_utf8_output.txt', 'w', encoding='iso-8859-1') as file:
    file.write(content)

In [10]:
with open('Documents/example_utf8_output.txt', encoding='iso-8859-1') as file:
    print(file.read())

20£



#### More
If you don't know the encoding of the file, BeautifulSoup can make a guess about it

In [13]:
from bs4 import UnicodeDammit 

with open('Documents/example_utf8_output.txt', 'rb') as file:
    content = file.read()
    
sugestion = UnicodeDammit(content)
sugestion.original_encoding

'iso-8859-1'

In [14]:
sugestion.unicode_markup

'20£\n'

In [15]:
import csv

with open('Documents/top_films.csv') as file:
    data = csv.reader(file)
    
    for row in data:
        print(row)

['Rank', 'Admissions\n(millions)', 'Title (year) (studio)', 'Director(s)']
['1', '225.7', 'Gone With the Wind (1939)\xa0(MGM)', 'Victor Fleming, George Cukor, Sam Wood']
['2', '194.4', 'Star Wars (Ep. IV: A New Hope) (1977)\xa0(Fox)', 'George Lucas']
['3', '161.0', 'ET: The Extra-Terrestrial (1982)\xa0(Univ)', 'Steven Spielberg']
['4', '156.4', 'The Sound of Music (1965)\xa0(Fox)', 'Robert Wise']
['5', '130.0', 'The Ten Commandments (1956)\xa0(Para)', 'Cecil B. DeMille']
['6', '128.4', 'Titanic (1997)\xa0(Fox)', 'James Cameron']
['7', '126.3', 'Snow White and the Seven Dwarfs (1937)\xa0(BV)', 'David Hand']
['8', '120.7', 'Jaws (1975)\xa0(Univ)', 'Steven Spielberg']
['9', '120.1', 'Doctor Zhivago (1965)\xa0(MGM)', 'David Lean']
['10', '118.9', 'The Lion King (1994)\xa0(BV)', 'Roger Allers, Rob Minkoff']


In [20]:
with open("Documents/top_films.csv") as file:
    data = csv.DictReader(file)
    structured_data = [row for row in data]

In [21]:
structured_data[0]

{'Rank': '1',
 'Admissions\n(millions)': '225.7',
 'Title (year) (studio)': 'Gone With the Wind (1939)\xa0(MGM)',
 'Director(s)': 'Victor Fleming, George Cukor, Sam Wood'}

In [23]:
structured_data[0].keys()

dict_keys(['Rank', 'Admissions\n(millions)', 'Title (year) (studio)', 'Director(s)'])

In [29]:
structured_data[0]['Rank']

'1'

#### More : Introducing dialects

In [35]:
with open('Documents/top_films.csv' , newline='') as file:
    dialect = csv.Sniffer().sniff(file.read())

In [36]:
with open("Documents/top_films.csv", newline='') as file:
    reader = csv.reader(file, dialect)
    for row in reader:
        print(row)

['Rank', 'Admissions\n(millions)', 'Title (year) (studio)', 'Director(s)']
['1', '225.7', 'Gone With the Wind (1939)\xa0(MGM)', 'Victor Fleming, George Cukor, Sam Wood']
['2', '194.4', 'Star Wars (Ep. IV: A New Hope) (1977)\xa0(Fox)', 'George Lucas']
['3', '161.0', 'ET: The Extra-Terrestrial (1982)\xa0(Univ)', 'Steven Spielberg']
['4', '156.4', 'The Sound of Music (1965)\xa0(Fox)', 'Robert Wise']
['5', '130.0', 'The Ten Commandments (1956)\xa0(Para)', 'Cecil B. DeMille']
['6', '128.4', 'Titanic (1997)\xa0(Fox)', 'James Cameron']
['7', '126.3', 'Snow White and the Seven Dwarfs (1937)\xa0(BV)', 'David Hand']
['8', '120.7', 'Jaws (1975)\xa0(Univ)', 'Steven Spielberg']
['9', '120.1', 'Doctor Zhivago (1965)\xa0(MGM)', 'David Lean']
['10', '118.9', 'The Lion King (1994)\xa0(BV)', 'Roger Allers, Rob Minkoff']
