# Files

In [6]:
# Documentation: https://docs.python.org/3/tutorial/inputoutput.html#reading-and-writing-files

## Paradigm

In [7]:
# 1) open 
# 2) read/write/append 
# 3) close

## Creating a text file

In [8]:
# Below commands just create a file. It can be done using editor too
! echo "sample content line number 1" > abc.txt
! echo "sample content line number 2" >> abc.txt
! echo "sample content line number 3" >> abc.txt

In [9]:
! cat abc.txt

sample content line number 1
sample content line number 2
sample content line number 3


## Opening a file

In [10]:
#help(open)

In [11]:
#file = open('abc.txt', 'r')
file = open('abc.txt', "r") # text is by default
type(file)

_io.TextIOWrapper

In [12]:
#byte to char
# unicode char : 1 to 4 bytes

In [13]:
#import io
#help(io)

In [14]:
dir(file)

['_CHUNK_SIZE',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_checkClosed',
 '_checkReadable',
 '_checkSeekable',
 '_checkWritable',
 '_finalizing',
 'buffer',
 'close',
 'closed',
 'detach',
 'encoding',
 'errors',
 'fileno',
 'flush',
 'isatty',
 'line_buffering',
 'mode',
 'name',
 'newlines',
 'read',
 'readable',
 'readline',
 'readlines',
 'reconfigure',
 'seek',
 'seekable',
 'tell',
 'truncate',
 'writable',
 'write',
 'write_through',
 'writelines']

In [15]:
#help(file)

In [16]:
print(file.name)

abc.txt


In [17]:
print(file.mode)

r


In [18]:
print(file.closed)

False


## Reading file contents

In [19]:
help(file.read)

Help on built-in function read:

read(size=-1, /) method of _io.TextIOWrapper instance
    Read at most n characters from stream.
    
    Read from underlying buffer until we have n characters or we hit EOF.
    If n is negative or omitted, read until EOF.



In [20]:
content = file.read()

In [21]:
print(content)

sample content line number 1
sample content line number 2
sample content line number 3



## Closing the file

In [22]:
help(file.close)

Help on built-in function close:

close() method of _io.TextIOWrapper instance
    Flush and close the IO object.
    
    This method has no effect if the file is already closed.



In [23]:
file.closed # has value False, as file is not yet closed

False

In [24]:
file.close()

In [25]:
file.closed # has value True, as file is closed

True

## Reading file chunk by chunk

In [26]:
# concept of file position (returned by file.tell() )

In [27]:
! cat abc.txt

sample content line number 1
sample content line number 2
sample content line number 3


In [28]:
file = open("abc.txt", "r")
print("position=",file.tell())

content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

file.close()


position= 0
content=[sample con]
position= 10
content=[tent line ]
position= 20
content=[number 1
s]
position= 30


## Seeking (changing) the file position

In [29]:
help(file.seek)

Help on built-in function seek:

seek(cookie, whence=0, /) method of _io.TextIOWrapper instance
    Change stream position.
    
    Change the stream position to the given byte offset. The offset is
    interpreted relative to the position indicated by whence.  Values
    for whence are:
    
    * 0 -- start of stream (the default); offset should be zero or positive
    * 1 -- current stream position; offset may be negative
    * 2 -- end of stream; offset is usually negative
    
    Return the new absolute position.



In [30]:
# start reading at offset 10
file = open("abc.txt", "r")
file.seek(10)


content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

file.close()


content=[tent line ]
position= 20
content=[number 1
s]
position= 30
content=[ample cont]
position= 40


In [31]:
# keep shifting back to offset 0
file = open("abc.txt", "r")


content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
file.seek(0)
print("position=",file.tell())

content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
file.seek(0)
print("position=",file.tell())
      
content = file.read(10) # Maximum 10 characters
print(f"content=[{content}]")
print("position=",file.tell())

file.close()


content=[sample con]
position= 0
content=[sample con]
position= 0
content=[sample con]
position= 10


## complete code for reading chunk by chunk

In [32]:
#Version1
file = open("abc.txt", "r")

while True: 
    content = file.read(10)
    if len(content) == 0:
        print("Reached end of file")
        break
    print(f"content=[{content}]")
   
file.close()

content=[sample con]
content=[tent line ]
content=[number 1
s]
content=[ample cont]
content=[ent line n]
content=[umber 2
sa]
content=[mple conte]
content=[nt line nu]
content=[mber 3
]
Reached end of file


In [33]:
# Version2
file = open("abc.txt", "r")

content = file.read(10)
while len(content) > 0:
    print(f"content=[{content}]")
    content = file.read(10)
   
file.close()

content=[sample con]
content=[tent line ]
content=[number 1
s]
content=[ample cont]
content=[ent line n]
content=[umber 2
sa]
content=[mple conte]
content=[nt line nu]
content=[mber 3
]


In [34]:
# Version2
file = open("abc.txt", "r")


while content := file.read(10):
    print(f"content=[{content}]")
    
   
file.close()

content=[sample con]
content=[tent line ]
content=[number 1
s]
content=[ample cont]
content=[ent line n]
content=[umber 2
sa]
content=[mple conte]
content=[nt line nu]
content=[mber 3
]


In [35]:
# Version 3
file = open("abc.txt", "r")

while len(content := file.read(10)) > 0:
    print(f"content=[{content}]")
   
file.close()

content=[sample con]
content=[tent line ]
content=[number 1
s]
content=[ample cont]
content=[ent line n]
content=[umber 2
sa]
content=[mple conte]
content=[nt line nu]
content=[mber 3
]


In [36]:
# Version 4
file = open("abc.txt", "r")

while content := file.read(10):
    print(f"content=[{content}]")
   
file.close()

content=[sample con]
content=[tent line ]
content=[number 1
s]
content=[ample cont]
content=[ent line n]
content=[umber 2
sa]
content=[mple conte]
content=[nt line nu]
content=[mber 3
]


## Resource leaks

In [37]:
# What if an exception occurs between open() and close()
file = open("abc.txt", "r")

while content := file.read(10):
    print(f"content=[{content}]")
    print(1/0) # just to raise exception

print("closing file")
file.close()

content=[sample con]


ZeroDivisionError: division by zero

In [None]:
# Fixing the resource leak problem (the hard way)
# try-exceptblock1-exceptblock2-exceptblock3-else-finally
#   try 
#   except
#   else
#   finally
file = open("abc.txt", "r")

try:
    while content := file.read(10):
        print(f"content=[{content}]")
        print(1/0) # just to raise exception
    
finally:
    print("closing file")
    file.close()

## with statement (context manager)

In [None]:
# Fixing the resource leak problem (the elegant way)


with open("abc.txt", "r") as file:
    while content := file.read(10):
        print(f"content=[{content}]")
        print(1/0)
   

    
# need not call file.close()


# file.__enter__()

# file.__exit__()

In [None]:
file.closed

In [None]:
# Fixing the resource leak problem (the elegant way)
with open("abc.txt", "r") as file: 
    while content := file.read(10):
        print(f"content=[{content}]")
        #print(1/0) # just to raise exception
    
# need not call file.close()

In [None]:
file.closed

In [None]:
# Based on  __enter__() and  __exit__()

## Writing contents in a file

In [None]:
# Now let us overwrite the file and write out own content


In [None]:
#import io

In [None]:
#help(io)

In [None]:
! echo "line1\nline2\line3\nline4" > abc.txt

In [None]:
! cat abc.txt

In [None]:
file=open('abc.txt', 'w')

In [None]:
! cat abc.txt

In [None]:
type(file)

In [None]:
! cat abc.txt # Notice file contents have got truncated

In [None]:
help(file.write)

In [None]:
file.write('This is line 1\nThis is line 2\nThis is line 3')

In [None]:
len('This is line 1\nThis is line 2\nThis is line 3')

In [None]:
file.name

In [None]:
! ls -l abc.txt # why size is 0

In [None]:
! cat abc.txt # why content is empty?

In [None]:
file.closed # boolean flag

In [None]:
file.flush() # flush without close

In [None]:
dir(file)

In [None]:
file.close()

In [None]:
file.closed

In [None]:
! ls -l abc.txt

In [None]:
! cat abc.txt

In [None]:
# Complete program

with open('abc.txt', 'w') as file:
    file.write('This is line 1\nThis is line 2\nThis is line 3')

In [None]:
! cat abc.txt

## Appending contents to a file

In [None]:
# Now lets append content to the file

In [None]:
with open('abc.txt', 'a') as file:
    file.write("\nThis is line 4")

In [None]:
! cat 'abc.txt'

## Read-Write mode

In [None]:
# r+ : means read and write : contents are preserved
# w+ : means write and read : contents are truncated

In [None]:
help(file.tell)

In [None]:
# Lets open file in r+ (read-write mode) and use seek() and tell()

In [None]:
! echo "sample content line number 1" > abc.txt
! echo "sample content line number 2" >> abc.txt
! echo "sample content line number 3" >> abc.txt

In [None]:
file=open('abc.txt', 'r+')

file.write("ABCDEF")

file.seek(0)


content=file.read(10)
print(f"content=[{content}]")

content=file.read(10)
print(f"content=[{content}]")

file.close()

!cat abc.txt


## Reading line by line

In [None]:
! echo "sample content line number 1" > abc.txt
! echo "sample content line number 2" >> abc.txt
! echo "sample content line number 3" >> abc.txt

In [None]:
! cat abc.txt


In [None]:
help(io.TextIOWrapper.readline)

In [None]:
file = open('abc.txt', 'r')

In [None]:
str = file.readline(5)
print(str)

In [None]:
str = file.readline(5)
print(str)

In [None]:
str = file.readline(5)
print(str)

In [None]:
# Version1
with open('abc.txt', 'r') as file:
    while True:
        line = file.readline() # Try with 10 as argument
        if len(line) == 0:
            break;

        print("line:", line)

In [None]:
# Version2
with open('abc.txt', 'r') as file:
    while line := file.readline():
        print("line:", line, end="")

## Reading multiple lines

In [None]:
help(io.TextIOWrapper.readlines)

In [None]:
# Version1
with open('abc.txt', 'r') as file:
    lines = file.readlines(5)
    print(lines)


In [None]:
lines[0]

In [None]:
file = open('abc.txt', 'r')

In [None]:
print(file.readlines(5))

In [None]:
print(file.readlines(5))

In [None]:
print(file.readlines(5))

## [NEW] Writing line by line

In [None]:
help(file.writelines)

In [None]:

with open('abc.txt', 'w') as file:
    file.writelines(["content of line1\n", "content of line2\n"])



In [None]:
! cat abc.txt

In [None]:

with open('abc.txt', 'w') as file:
    file.writelines(["content of line1\n", "content of line2\n"])
    file.writelines(("content of line3\n", "content of line4\n"))



In [None]:
!cat abc.txt

In [None]:

with open('abc.txt', 'a') as file:
    file.writelines(["content of line5\n", "content of line6\n"])
    file.writelines(("content of line7\n", "content of line8\n"))



In [None]:
!cat abc.txt

## Using file as an iterator of lines

In [None]:
file = open('abc.txt', 'r')

In [None]:
for line in file:
    print(line)

In [None]:
file = open('abc.txt', 'r')

In [None]:
print(next(file))

In [None]:
print(next(file))

In [None]:
print(next(file))

In [None]:
print(next(file))

In [None]:
# Version1

with open('abc.txt', 'r') as file:
    # file as an iterable
    for line in file:
        print(line)

In [None]:
# Version2 : imperfect : not graceful

with open('abc.txt', 'r') as file:
    # file as an iterable
    
    while line := next(file):
        print(line)

In [None]:
# Version2 : perfect : graceful

with open('abc.txt', 'r') as file:
    # file as an iterable
    try:
        while line := next(file):
            print(line)
    except StopIteration:
        pass

In [None]:
# Version3 : typecast to list !

with open('abc.txt', 'r') as file:
    lines = list(file) # Iteration happens implicitly
    #lines=file.readlines()
    print(lines)


# Processing a CSV file (the hard way)

## How to parse one line

In [None]:
# Assume CSV file has name, age and country (comma separated)
# Assume we have read one line without the newline in line
# Below method can be used to extract fields
line="Ram,12,India"
name,age,country= line.split(',')

age=int(age)
print(name,age,country)


In [None]:
type(age)

In [None]:
#dir(age)

In [None]:
# Assume you wish to create the CSV file and want to construct a line from the variables
",".join([name, age, country])

In [None]:
help(str.replace
   )

In [None]:
# While parsing, how to ignore spaces anywhere in the line

line="  Ram   ,   12   ,   India   "
name,age,country= line.replace(" ", "").split(',')
age=int(age)
print(name,age,country)



In [None]:
# While parsing, how to ignore spaces anywhere in the line

# What if there are tabs too?
line="  Ram   \t,   12   \t,   India   "
name,age,country= line.replace(" ", "").split(',')
age=int(age)
print(name,age,country)


In [None]:
# While parsing, how to ignore spaces anywhere in the line

# What if there are tabs too?
line="  Ram   \t,   12   \t,   India   "
name,age,country= line.replace(" ", "").replace("\t","").split(',')
age=int(age)
print(name,age,country)


In [None]:
# Another way to remove all whitespace (including newline)
line="  Ram   \t,   12   \t,   India   \n line number 2"
print(line)
print("".join(line.split()))

In [None]:
#Note: sometimes, the fields can contain spaces... e.g. Name and Address
# Then, we wish to remove whitepspace at both ends of line and at both ends of the fields(but not middle of fields)
# line.strip() and field.strip() can be used


In [None]:
#help(str)

line="  Ram Kumar  \t,   12   \t,   India "
name,age,country= map(lambda x: x.strip(), line.strip().split(','))
print(f"name={name}, age={age}, country={country}")
# age=int(age)
# print(name,age,country)

## Complete code where row is represented as dictionary

In [None]:
# Creating dictionary {header_column_name : fieldvalue, ....}
# Version1: has newlines in header and data
with open("person.csv", "r") as file:
    header = next(file).split(",")
    print("header=", header)

    for row in file:
        fields = row.split(",")
        print("fields=", fields)
        rowdict ={ }
        for i in range(len(header)):
            rowdict[ header[i] ] = fields[i]
        # Question : how can you simplify above dictionary creation code?
        print(f"Row dictionary:{rowdict}")

In [None]:
# Creating dictionary {header_column_name : fieldvalue, ....}
# Version2: removed newlines in header and data
#     still has space in "Age" header and data?
with open("person.csv", "r") as file:
    header = next(file).strip().split(",") # Added .strip()
    print("header=", header)

    for row in file:
        fields = row.strip().split(",") # Added .strip()
        print("fields=", fields)
        rowdict ={ }
        for i in range(len(header)):
            rowdict[ header[i] ] = fields[i]
        # Question : how can you simplify above dictionary creation code?
        print(f"Row dictionary:{rowdict}")

In [None]:
# Creating dictionary {header_column_name : fieldvalue, ....}
# Version3: removed newlines in header and data
#     Removed trailing space in headers and fields
with open("person.csv", "r") as file:
    header = next(file).strip().split(",") 
    print("header=", header)

    for row in file:
        fields = row.strip().split(",")
        print("fields=", fields)
        rowdict ={ }
        for i in range(len(header)):
            rowdict[ header[i].strip() ] = fields[i].strip() # Notice: Remove trailing spaces in header and field
        # Question : how can you simplify above dictionary creation code?
        print(f"Row dictionary:{rowdict}")

In [None]:
# Creating dictionary {header_column_name : fieldvalue, ....}
# Version4: Simplified using dictinary comprhension and zip()
with open("person.csv", "r") as file:
    header = next(file).strip().split(",") 
    print("header=", header)

    for row in file:
        fields = row.strip().split(",")
        print("fields=", fields)
        rowdict={ column.strip() : field.strip() for column,field in zip(header, fields)}
        # rowdict ={ }
        # for i in range(len(header)):
        #     rowdict[ header[i].strip() ] = fields[i].strip() # Notice: Remove trailing spaces in header and field
        # Question : how can you simplify above dictionary creation code?
        print(f"Row dictionary:{rowdict}")

## Complete code where row is represented as list

In [None]:
# Version1: Simplified using dictinary comprhension and zip()
# has spaces in 'Age' header and data fields
with open("person.csv", "r") as file:
    list_rows = [ ]
    for row in file:
        fields = row.strip().split(",")
        list_rows.append(list(fields))
    print(list_rows)

In [None]:
# Version1: Simplified using dictinary comprhension and zip()
# has spaces in 'Age' header and data fields
with open("person.csv", "r") as file:
    list_rows = [ ]
    for row in file:
        fields = row.strip().split(",")
        list_rows.append(list(map(lambda x:x.strip(), fields)))
    print(list_rows)

# In-Memory text streams

In [None]:
import io
file = io.StringIO("""
this is line1
this is line2
this is line3
this is line4
""")



In [None]:
type(file)

In [None]:
dir(file)

In [None]:
file.closed

In [None]:
content=file.read(5)
print(content)

In [None]:
file.tell()

In [None]:
content=file.read(5)
print(content)

In [None]:
file.tell()

In [None]:
content=file.read(5)
print(content)

In [None]:
file.close()

In [None]:
file.closed

In [None]:
file.read()

# Parsing CSV Files using csv module

In [None]:
! cat person.csv

In [44]:
import csv

In [45]:
dir(csv)

['Dialect',
 'DictReader',
 'DictWriter',
 'Error',
 'QUOTE_ALL',
 'QUOTE_MINIMAL',
 'QUOTE_NONE',
 'QUOTE_NONNUMERIC',
 'Sniffer',
 'StringIO',
 '_Dialect',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '__version__',
 'excel',
 'excel_tab',
 'field_size_limit',
 'get_dialect',
 'list_dialects',
 're',
 'reader',
 'register_dialect',
 'unix_dialect',
 'unregister_dialect',
 'writer']

In [46]:
#Documentation : https://docs.python.org/3/library/csv.html

## Reading using csvreader

In [47]:
dir(csv.reader)

['__call__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__name__',
 '__ne__',
 '__new__',
 '__qualname__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__self__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__text_signature__']

In [48]:
#help(csv.reader) #Note first argument can be any iterator of lines

In [49]:
# Version1 : read all lines 
with open("person.csv", "r") as file:
    csvreader = csv.reader(file)
    #csvreader is an iterator. Each row is list of fields
    for row in csvreader:
        print(row) #parsed row

['Name', ' Age', 'Country']
['   Rohit Kumar    ', ' 12', 'India']
['Rahul', ' 14', 'India']
['Tom', ' 16', 'US']
['Vikram', ' 20', 'India']
[]
[]


In [50]:
# Version2: read header row separately
with open("person.csv", "r") as file:
    csvreader = csv.reader(file)
    header_row = next(csvreader)
    print("header_row=", header_row)
   
    for data_row in csvreader:
        print(data_row)

header_row= ['Name', ' Age', 'Country']
['   Rohit Kumar    ', ' 12', 'India']
['Rahul', ' 14', 'India']
['Tom', ' 16', 'US']
['Vikram', ' 20', 'India']
[]
[]


In [51]:
#Version3 : convert directly to list
with open("person.csv", "r") as file:
    csvreader = csv.reader(file)
    #convert to list of rows from iterator of rows
    list_of_rows=list(csvreader)
print(list_of_rows)

[['Name', ' Age', 'Country'], ['   Rohit Kumar    ', ' 12', 'India'], ['Rahul', ' 14', 'India'], ['Tom', ' 16', 'US'], ['Vikram', ' 20', 'India'], [], []]


In [53]:
#Version4: store header_row_separately
with open("person.csv", "r") as file:
    csvreader = csv.reader(file)
    header_row = next(csvreader)
    print("header_row=", header_row)
    list_of_rows=list(csvreader)
print("Data Rows=", list_of_rows)

header_row= ['Name', ' Age', 'Country']
Data Rows= [['   Rohit Kumar    ', ' 12', 'India'], ['Rahul', ' 14', 'India'], ['Tom', ' 16', 'US'], ['Vikram', ' 20', 'India'], [], []]


## Reading using csvreader from iterator of lines (instead of file)

In [54]:
our_sequence=[ "10,20", "a,b,c", "1,2,3,4,,6"  ] 

csvreader = csv.reader(our_sequence)
print(list(csvreader))

[['10', '20'], ['a', 'b', 'c'], ['1', '2', '3', '4', '', '6']]


In [55]:
# using a different delimiter
our_sequence=[ "10, 20", "a, b,c", "1,2, 3,4,,6"  ] 

csvreader = csv.reader(our_sequence, delimiter=' ')
print(list(csvreader))

[['10,', '20'], ['a,', 'b,c'], ['1,2,', '3,4,,6']]


## Processing data in list of lists

In [None]:
# Print all names
for row in list_of_rows[1:]:
    print(row[0])

In [None]:
# Average age 
sum([ int(row[1]) for row in list_of_rows[1:] ]) / len(list_of_rows[1:])

## Reading used DictReader

In [56]:
dir(csv.DictReader)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'fieldnames']

In [58]:
#help(csv.DictReader) # Notice you can provide fieldnames

In [59]:
with open("person.csv", "r") as file:
    dictreader = csv.DictReader(file)

    for row in dictreader:
        print(row)

{'Name': '   Rohit Kumar    ', ' Age': ' 12', 'Country': 'India'}
{'Name': 'Rahul', ' Age': ' 14', 'Country': 'India'}
{'Name': 'Tom', ' Age': ' 16', 'Country': 'US'}
{'Name': 'Vikram', ' Age': ' 20', 'Country': 'India'}


In [60]:
dictreader.fieldnames

['Name', ' Age', 'Country']

In [61]:
with open("person.csv", "r") as file:
    dictreader = csv.DictReader(file, fieldnames=["NameCol", "AgeCol", "CountryCol"])

    for row in dictreader:
        print(row)

{'NameCol': 'Name', 'AgeCol': ' Age', 'CountryCol': 'Country'}
{'NameCol': '   Rohit Kumar    ', 'AgeCol': ' 12', 'CountryCol': 'India'}
{'NameCol': 'Rahul', 'AgeCol': ' 14', 'CountryCol': 'India'}
{'NameCol': 'Tom', 'AgeCol': ' 16', 'CountryCol': 'US'}
{'NameCol': 'Vikram', 'AgeCol': ' 20', 'CountryCol': 'India'}


In [62]:
# Reading from list of lines
our_sequence=[ "name,age,country", "Ram, 12, India", "Shyam, 14, India"]

csvreader = csv.DictReader(our_sequence)
print(list(csvreader))

[{'name': 'Ram', 'age': ' 12', 'country': ' India'}, {'name': 'Shyam', 'age': ' 14', 'country': ' India'}]


In [63]:
# different delimiter
our_sequence=[ "name:age:country", "Ram: 12: India", "Shyam: 14: India"]

csvreader = csv.DictReader(our_sequence, delimiter=':')
print(list(csvreader))

[{'name': 'Ram', 'age': ' 12', 'country': ' India'}, {'name': 'Shyam', 'age': ' 14', 'country': ' India'}]


In [64]:
# More about dilects here: https://docs.python.org/3/library/csv.html#csv-fmt-params

In [65]:
with open("person.csv", "r") as file:
    dictreader = csv.DictReader(file)

    list_of_rows = list(dictreader)
    print(list_of_rows)

[{'Name': '   Rohit Kumar    ', ' Age': ' 12', 'Country': 'India'}, {'Name': 'Rahul', ' Age': ' 14', 'Country': 'India'}, {'Name': 'Tom', ' Age': ' 16', 'Country': 'US'}, {'Name': 'Vikram', ' Age': ' 20', 'Country': 'India'}]


In [66]:
# Fixing spaces
with open("person.csv", "r") as file:
    dictreader = csv.DictReader(file)

    #list_of_rows = [ processed_rows for row in dictreader]
    list_of_rows = [ {k.strip():v.strip() for k,v in row.items()} for row in dictreader]
    print(list_of_rows)

[{'Name': 'Rohit Kumar', 'Age': '12', 'Country': 'India'}, {'Name': 'Rahul', 'Age': '14', 'Country': 'India'}, {'Name': 'Tom', 'Age': '16', 'Country': 'US'}, {'Name': 'Vikram', 'Age': '20', 'Country': 'India'}]


In [67]:
# Average age
sum([ int(row['Age']) for row in list_of_rows]) / len(list_of_rows)

15.5

In [68]:
for row in dictreader:
    print(row)

ValueError: I/O operation on closed file.

## Writing CSV file using csv. writer

In [None]:
import csv

In [None]:
! cat target.txt

In [None]:
file = open("target.txt", "w")

In [None]:
! cat target.txt

In [None]:
csvwriter = csv.writer(file)

In [None]:
help(csvwriter)

In [None]:
help(csv.writer)

In [None]:
help(csvwriter.writerow)

In [None]:
with open("target.txt", "w") as file:
    csvwriter = csv.writer(file)
    csvwriter.writerow(('Ram', '12', 'India'))
    csvwriter.writerow(['Shyam', 14.6, 'India'])
    csvwriter.writerow(range(5))
    csvwriter.writerow("hello")
    csvwriter.writerow({"a":1, "b":2})
    



In [None]:
! cat target.txt

In [None]:
file.close()

In [None]:
help(csvwriter.writerows)

In [None]:
! cat target.txt

In [None]:
# Writing a CSV file
import csv
header=['Name','Age','Country']
rows=[('Shyam',12,'India'),
      ['Mohan',14, 'India'],
    ('Shyam',12,'India'),
      {"a":"b", "c":"d"},     
      ('Shyam',12,'India'),
      ['Mohan','14', 'India'],   
      ('Shyam',16,'India'),
      ['Mohan',14, 'India'],
      "hello"
     ]
       
with open("xyz2.csv",'w') as file:
    csvwriter=csv.writer(file)
    csvwriter.writerow(header)
    csvwriter.writerows(rows)

In [None]:
! cat xyz2.csv

In [None]:
# Writing a CSV file
import csv
header=['1Name','Age','Country']
rows=[('1Shyam',12,'India'),
      ['1Mohan',14, 'India'],
    ('1Shyam',12,'India'),
      ['1Mohan',14, 'India'],     
      ('1Shyam',12,'India'),
      ['Mohan','14', 'India'],   
      ('Shyam',16,'India'),
      ['Mohan',14, 'India'],
      "hello"
     ]
       
with open("xyz2.csv",'a') as file:
    csvwriter=csv.writer(file)
    csvwriter.writerow(header)
    csvwriter.writerows(rows)

In [None]:
! cat xyz2.csv

## Writing CSV file using csv. DictWriter

In [None]:
import csv

In [None]:
#help(csv)

In [None]:
file = open("target.txt", "w")

In [None]:
#dir(csv)

In [None]:
#help(csv.DictWriter)

In [None]:
csvwriter = csv.DictWriter(file, fieldnames=('Name', 'Age', 'Country'))

In [None]:
csvwriter.fieldnames

In [None]:
csvwriter.writeheader()


In [None]:
csvwriter.writerow({"Name":"Ram", "Age":"18", "Country":"India"})

In [None]:
csvwriter.writerow({"Age":"100"})

In [None]:
file.close()

In [None]:
help(csvwriter)

In [None]:
! cat target.txt

In [None]:
help(csvwriter)

In [None]:

with open("target2.txt" , "a") as file:
    csvwriter = csv.DictWriter(file, fieldnames=('Name', 'Age', 'Country'))
    csvwriter.writerows([{"Name":"Tom3", "Age":18, "Country":"India"},
                     {"Name":"Harry3", "Age":13, "Country":"India"}])

    csvwriter.writerows(({"Name":"Tom31", "Age":18, "Country":"India"},
                     {"Name":"Harry31", "Age":13, "Country":"India"}))

In [None]:
!cat target2.txt

In [None]:
#help(open)

# JSON files

In [None]:
# concept of serialize, deserialize


Python object -> JSON Document : Serialize : dumps


JSON Document -> Python object : Desertialize : loads

In [None]:
# JSON vs CSV

In [None]:
#Documentation: 
# https://docs.python.org/3/library/json.html#module-json

# https://docs.python.org/3/library/json.html#module-json

In [70]:
import json

In [71]:
dir(json)

['JSONDecodeError',
 'JSONDecoder',
 'JSONEncoder',
 '__all__',
 '__author__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_default_decoder',
 '_default_encoder',
 'codecs',
 'decoder',
 'detect_encoding',
 'dump',
 'dumps',
 'encoder',
 'load',
 'loads',
 'scanner']

## json.loads

In [72]:
help(json.loads)

Help on function loads in module json:

loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
    Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
    containing a JSON document) to a Python object.
    
    ``object_hook`` is an optional function that will be called with the
    result of any object literal decode (a ``dict``). The return value of
    ``object_hook`` will be used instead of the ``dict``. This feature
    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    
    ``object_pairs_hook`` is an optional function that will be called with the
    result of any object literal decoded with an ordered list of pairs.  The
    return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    This feature can be used to implement custom decoders.  If ``object_hook``
    is also defined, the ``object_pairs_hook`` takes priority.
    
    ``parse_float``, if sp

In [86]:
# JSON Object
json_str = '''
[
{
"Name":["Ram",10,20],
"Age":12,
"Country":"India"
},
{
"Name":["Ram",10,20],
"Age":12,
"Country":"India"
}

]
'''

xyz = json.load(json_str) # Convert JSON string into dictionary

print(xyz)
print(type(xyz))

[{'Name': ['Ram', 10, 20], 'Age': 12, 'Country': 'India'}, {'Name': ['Ram', 10, 20], 'Age': 12, 'Country': 'India'}]
<class 'list'>


In [87]:
# JSON Object
json_str = '''{"Name":"Ram",
"MarksList":[12,14,16],
"Country":"India"}'''

xyz = json.loads(json_str) # Convert JSON string into dictionary

print(xyz)
print(type(xyz))

{'Name': 'Ram', 'MarksList': [12, 14, 16], 'Country': 'India'}
<class 'dict'>


In [75]:
# List of objects
json_str = '''
[
{"Name":"Ram","Age":12,"Country":"India"},
{"Name":"Mohan","Age":14,"Country":"India"
}
]'''

xyz = json.loads(json_str) # Convert JSON string into dictionary

print(xyz)
print(xyz[1]["Age"])

[{'Name': 'Ram', 'Age': 12, 'Country': 'India'}, {'Name': 'Mohan', 'Age': 14, 'Country': 'India'}]
14


In [76]:
# List of constants
json_str = '''["10",20.5,true, null]'''

xyz = json.loads(json_str) # Convert JSON string into dictionary

print(xyz)

['10', 20.5, True, None]


In [88]:
json_str='''
{
"config" : {

    "numThreads": 4,
    "standaloneMode" : true,
    "threadIDs" : [5,6,7,8],
    "configThreads" : [
        {
          "maxRAM": 10,
          "maxCPUTime": 20
        },
        {
          "maxRAM": 11,
          "maxCPUTime": 21
        },
        {
          "maxRAM": 12,
          "maxCPUTime": 22
        },
        {
          "maxRAM|": 13,
          "maxCPUTime": 23
        }
    ]
}
}
'''
xyz = json.loads(json_str) # Convert JSON string into dictionary

print(xyz)
#print(xyz['config'])
#print(xyz['config']["configThreads"][3])

{'config': {'numThreads': 4, 'standaloneMode': True, 'threadIDs': [5, 6, 7, 8], 'configThreads': [{'maxRAM': 10, 'maxCPUTime': 20}, {'maxRAM': 11, 'maxCPUTime': 21}, {'maxRAM': 12, 'maxCPUTime': 22}, {'maxRAM|': 13, 'maxCPUTime': 23}]}}


In [78]:
print(xyz["config"]['configThreads'][2])

{'maxRAM': 12, 'maxCPUTime': 22}


In [79]:
json_str = '''{"Name":"Ram",
         "Age":12,
     "Country":"India", 
      "Marks" : [10,20,30]}'''
d1 = json.loads(json_str) # Convert JSON string into dictionary
print(type(d1))
print(d1)
print(d1["Marks"][0])

<class 'dict'>
{'Name': 'Ram', 'Age': 12, 'Country': 'India', 'Marks': [10, 20, 30]}
10


In [80]:
d1["Marks"].append(40)
print(d1)

{'Name': 'Ram', 'Age': 12, 'Country': 'India', 'Marks': [10, 20, 30, 40]}


In [89]:
json_str = '''{"Name":"Ram","Age":12,"Country":"India", "Marks" : [10,20,30],
        "Addresses" :
             [{"street_addr" : "Happy Home",
              "city" : "Happy city",
              "state" : "Delhi",
              "country" : "India"}, 
              {"street_addr" : "Happy Home2",
              "city" : "Happy city2",
              "state" : "Delhi",
              "country" : "India"}
              
              ]
        
        }'''
d1 = json.loads(json_str) # Convert JSON string into dictionary

print(d1)
print(d1["Marks"][0])
print(d1["Addresses"])

{'Name': 'Ram', 'Age': 12, 'Country': 'India', 'Marks': [10, 20, 30], 'Addresses': [{'street_addr': 'Happy Home', 'city': 'Happy city', 'state': 'Delhi', 'country': 'India'}, {'street_addr': 'Happy Home2', 'city': 'Happy city2', 'state': 'Delhi', 'country': 'India'}]}
10
[{'street_addr': 'Happy Home', 'city': 'Happy city', 'state': 'Delhi', 'country': 'India'}, {'street_addr': 'Happy Home2', 'city': 'Happy city2', 'state': 'Delhi', 'country': 'India'}]


In [82]:
d1["Addresses"]

[{'street_addr': 'Happy Home',
  'city': 'Happy city',
  'state': 'Delhi',
  'country': 'India'},
 {'street_addr': 'Happy Home2',
  'city': 'Happy city2',
  'state': 'Delhi',
  'country': 'India'}]

In [83]:
d1["Addresses"][1]

{'street_addr': 'Happy Home2',
 'city': 'Happy city2',
 'state': 'Delhi',
 'country': 'India'}

In [84]:
d1["Addresses"][1]["city"]

'Happy city2'

In [85]:
d1["Address"]

KeyError: 'Address'

## json.dumps

In [90]:
d1={'Name': 'Ram', "flag":True, 'Attr1': None, 'Country': 'India', 'Marks': [10, 20, 30], 
    'Address': {'street_addr': 'Happy Home', 'city': 'Happy city', 
                'state': 'Delhi', 'country': 'India'}}
json_str=json.dumps(d1)
print(type(json_str))
json_str

<class 'str'>


'{"Name": "Ram", "flag": true, "Attr1": null, "Country": "India", "Marks": [10, 20, 30], "Address": {"street_addr": "Happy Home", "city": "Happy city", "state": "Delhi", "country": "India"}}'

In [None]:
d1={'Name': 'Ram',  'Age': 12,  'Country': 'India', 'Marks': [10, 20, 30], 
    'Address': {'street_addr': 'Happy Home', 'city': 'Happy city', 
                'state': 'Delhi', 'country': 'India'}}
json_str=json.dumps(d1, indent=True)
print(json_str)

In [None]:
list_of_persons = [d1,d1,d1]
d1={'Name': 'Ram',  'Age': 12,  'Country': 'India', 'Marks': [10, 20, 30], 
    'Address': {'street_addr': 'Happy Home', 'city': 'Happy city', 
                'state': 'Delhi', 'country': 'India'}}
json_str=json.dumps(list_of_persons, indent=True)
print(json_str)

## json.dump

In [None]:
# dump : dump into file

In [91]:
with open("d1.txt", "w") as file:
    d1={'Name': 'Ram', 'Age': 12, 
        'Country': 'India', 'Marks': [10, 20, 30], 
        'Address': {'street_addr': 'Happy Home', 
                    'city': 'Happy city', 'state': 'Delhi', 'country': 'India'}}
    json.dump(d1, file)


In [92]:
! cat d1.txt

{"Name": "Ram", "Age": 12, "Country": "India", "Marks": [10, 20, 30], "Address": {"street_addr": "Happy Home", "city": "Happy city", "state": "Delhi", "country": "India"}}

In [None]:
with open("d1_indented.txt", "w") as file:
    d1={'Name': 'Ram', 'Age': 12, 'Country': 'India', 'Marks': [10, 20, 30], 'Address': {'street_addr': 'Happy Home', 'city': 'Happy city', 'state': 'Delhi', 'country': 'India'}}
    json.dump(d1, file, indent=True)
    


In [None]:
! cat d1_indented.txt

## json.load

In [None]:
# load from file

In [None]:
with open("d1.txt", "r") as file:
    d1=json.load(file)
    print(d1)


In [None]:
d1["Address"]

In [None]:
help(json.load)

In [None]:
with open("d1_indented.txt", "r") as file:
    d1=json.load(file)
    print(d1)


In [None]:
#help(json.load)

# Excel Sheets

In [None]:
! pip install openpyxl


In [93]:
import openpyxl

ModuleNotFoundError: No module named 'openpyxl'

In [94]:
filename="sample.xlsx"

workbook = openpyxl.load_workbook(filename)

NameError: name 'openpyxl' is not defined

In [95]:
type(workbook)

NameError: name 'workbook' is not defined

In [None]:
dir(workbook)

In [None]:
help(workbook.save)

In [None]:
sheet = workbook.active

In [None]:
type(sheet)

In [None]:
name1 = sheet.cell(row = 2, column = 1)

In [None]:
type(name1)

In [None]:
print(name1)

In [None]:
print(name1.value)