Reading Text Files

In [2]:
file_name = 'DEXUSEU.csv'

In [4]:
file = open(file_name, 'r')

In [5]:
file.name

'DEXUSEU.csv'

In [6]:
file.readable()

True

In [7]:
file.writable()

False

In [10]:
file.closed

True

In [11]:
f = open(file_name)
data = f.readlines()
f.close()

In [12]:
data

['John,Doe,120 jefferson st.,Riverside, NJ, 08075\n',
 'Jack,McGinnis,220 hobo Av.,Phila, PA,09119\n',
 '"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075\n',
 'Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234\n',
 ',Blankman,,SomeTown, SD, 00298\n',
 '"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123']

In [13]:
f = open(file_name)
for line in f:
    print(line, end='')

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123

In [14]:
f.close()

In [15]:
f = open(file_name)
print(next(f))
print(next(f))
print(next(f))
f.close()

John,Doe,120 jefferson st.,Riverside, NJ, 08075

Jack,McGinnis,220 hobo Av.,Phila, PA,09119

"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075



In [16]:
f = open(file_name)
try:
    for row in f:
        print(row)
        raise ValueError('forcing an exception...')
finally:
    print('closing file')
    f.close()

John,Doe,120 jefferson st.,Riverside, NJ, 08075

closing file


ValueError: forcing an exception...

In [18]:
f.closed

True

In [19]:
with open(file_name) as f:
    print(f.closed)
print(f.closed)

False
True


In [20]:
with open(file_name) as f:
    for line in f:
        print(line)

John,Doe,120 jefferson st.,Riverside, NJ, 08075

Jack,McGinnis,220 hobo Av.,Phila, PA,09119

"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075

Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234

,Blankman,,SomeTown, SD, 00298

"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123


In [21]:
float('1.0987')

1.0987

In [22]:
float('.')

ValueError: could not convert string to float: '.'

In [24]:
with open(file_name) as f:
    headers = next(f)

    for row in f:
        row = row.strip()

        print(row)

Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123


Writing Text Files

In [26]:
f = open('test.csv', 'w')

In [27]:
f.write('abc')

3

In [28]:
f.write('123456')

6

In [29]:
f.close()

In [1]:
with open('test.csv') as f:
    print(f.readlines())

['abc123456']


In [2]:
with open('test.csv', 'w') as f:
    f.write('abc\n')
    f.write('123456\n')

In [3]:
with open('test.csv', 'r') as f:
    print(f.readlines())

['abc\n', '123456\n']


In [4]:
data = ['line 1', 'line 2', 'line 3']

In [5]:
with open('test.csv', 'w') as f:
    f.writelines(data)

In [6]:
with open('test.csv') as f:
    print(f.readlines())

['line 1line 2line 3']


In [7]:
with open('test.csv', 'w') as f:
    f.write('\n'.join(data))

In [8]:
with open('test.csv') as f:
    print(f.readlines())

['line 1\n', 'line 2\n', 'line 3']


In [9]:
with open('test.csv', 'r') as f:
    raise ValueError('Error')

ValueError: Error

In [10]:
f

<_io.TextIOWrapper name='test.csv' mode='r' encoding='cp1252'>

In [11]:
f.closed

True

In [12]:
with open('test.csv') as f:
    for line in f:
        print(line, end='')

line 1
line 2
line 3

In [13]:
with open('does_not_exist.txt', 'a') as f:
    f.write('line 1')

In [14]:
with open('does_not_exist.txt') as f:
    print(f.readlines())

['line 1']


In [15]:
source_file = 'DEXUSEU.csv'

In [17]:
with open(source_file) as f:
    for _ in range(5):
        print(next(f).strip())

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298


In [18]:
target_file = 'output.csv'

In [20]:
with open(source_file) as f:
    data = f.readlines()

In [21]:
data[0:5]

['John,Doe,120 jefferson st.,Riverside, NJ, 08075\n',
 'Jack,McGinnis,220 hobo Av.,Phila, PA,09119\n',
 '"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075\n',
 'Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234\n',
 ',Blankman,,SomeTown, SD, 00298\n']

In [22]:
del data[0]

In [23]:
data[0:5]

['Jack,McGinnis,220 hobo Av.,Phila, PA,09119\n',
 '"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075\n',
 'Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234\n',
 ',Blankman,,SomeTown, SD, 00298\n',
 '"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123']

In [24]:
data = [line.strip() for line in data]

In [25]:
data

['Jack,McGinnis,220 hobo Av.,Phila, PA,09119',
 '"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075',
 'Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234',
 ',Blankman,,SomeTown, SD, 00298',
 '"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123']

In [26]:
data = [line.split(',') for line in data]

In [27]:
data[0:5]

[['Jack', 'McGinnis', '220 hobo Av.', 'Phila', ' PA', '09119'],
 ['"John ""Da Man"""',
  'Repici',
  '120 Jefferson St.',
  'Riverside',
  ' NJ',
  '08075'],
 ['Stephen',
  'Tyler',
  '"7452 Terrace ""At the Plaza"" road"',
  'SomeTown',
  'SD',
  ' 91234'],
 ['', 'Blankman', '', 'SomeTown', ' SD', ' 00298'],
 ['"Joan ""the bone""',
  ' Anne"',
  'Jet',
  '"9th',
  ' at Terrace plc"',
  'Desert City',
  'CO',
  '00123']]

In [28]:
def split_date(dt_str):
    return dt_str[:4], dt_str[5:7], dt_str[8:]

In [33]:
def transform_row_for_output(row):
    row = row.strip()
    dt_str, rate = row.split(',')
    year, month, day = split_date(dt_str)

    try:
        float(rate)
    except ValueError:
        return ''
    
    month = str(int(month))
    day = str(int(day))

    result = ','.join([year, month, day, rate])
    result += '\n'
    return result

In [34]:
row = '2015-04-03,1.0990\n'

In [35]:
transform_row_for_output(row)

'2015,4,3,1.0990\n'

In [36]:
with open(source_file) as f:
    data = f.readlines()

In [37]:
with open(target_file, 'w') as f:
    for row in data:
        f.write(row)

In [38]:
with open(target_file) as f:
    for row in f:
        print(row.strip())

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123


In [39]:
def transform_file_batch(source_file, target_file):
    with open(source_file) as f:
        data = f.readlines()
        
    del data[0]
    
    with open(target_file, 'w') as f:
        f.write('YEAR,MONTH,DAY,EXCH\n')
        for row in data:
            f.write(transform_row_for_output(row))

In [40]:
def transform_file(source_file, target_file):
    with open(source_file) as source:
        with open(target_file, 'w') as target:
            # need to skip first row in source file (headers)
            next(source)
            
            # write out header file
            target.write('YEAR,MONTH,DAY,EXCH\n')
            
            for row in source:
                target.write(transform_row_for_output(row))