In [11]:
# file is not present
f = open('sample.txt','w')
f.write('hello')
f.close()

In [None]:
# write multiline strings
f = open('sample1.txt','w')
f.write('''hello from
multiline strings
this prints in multiline''')# or write f.write in many times
f.close()

In [12]:
# if the file is already present and we write in that file it replaces the content present in the file
# we can use append mode to append in the already present file instead of w mode

f = open('sample.txt', 'a')
f.write('\nkya ye line sach me naye line me print hui hai')
f.close()

In [14]:
# if writing lines in multilines like a list
l = ['hello','\nhow','\nare','\nyou']
f = open('sample.txt','w')
f.writelines(l)
f.close()

In [None]:
# Reading in files
# 1. 'read' reads all content all at one time
# 2. 'readlines' reads line by line
f = open('sample.txt', 'r')
read_file = f.read()
# print(read_file)

# to read only top 10 characters we can specify it in the parameter of read
f = open('sample.txt', 'r')
read_file = f.read(10)
# print(read_file)

# readline to read line by line

f = open('sample.txt', 'r')
print(f.readline()) # readline changes the line and print also changes the line
print(f.readline(),end='')
print(f.readline(),end='') # using readline(), end = '' will make sure it does not changes the line but print will still change line 


# use readline when the file is too big because when reading it takes all the content in ram
# use read when the content is small

hello

how
are


In [36]:
# using readline to print all using the while loop
f = open('sample.txt','r')
while True:
    data = f.readline()
    if data == '':break
    else: 
        print(f.readline(),end='')
f.close()

how
you

In [35]:
f = open('sample.txt','r')
print(f.read())
f.close()

hello
how
are
you


##### Using a context manager (with)

##### It's a good idea to close a file a after usage as it will free up the resources
##### If we don't close it, garbage collector will close it
##### with keyword closes the file as soon as the usage is over

1. with statement automatically handles file opening and closing
2. open('sample.txt', 'r') opens the file in read mode
3. as file assigns the file object to a variable
4. file.read() reads the entire content
5. The file is automatically closed when exiting the with block

In [37]:
with open('sample.txt', 'r') as file:
    content = file.read()
    print(content)
    
with open('sample.txt', 'r') as file:
    for line in file:
        print(line.strip())

hello
how
are
you
hello
how
are
you


In [None]:
with open('sample.txt', 'r') as file:
    lines = file.readlines()
    for line in lines:
        print(line.strip())

In [1]:
# making a big text file

big_file = ['hello how are you\n'] * 10000
with open('bigfile.txt','w') as file:
    file.writelines(big_file)

In [1]:
#Now we can read this file in chunk to not load everything in ram at once

with open('bigfile.txt','r') as file:
    while True:
        chunk = file.read(100)
        if not chunk:
            break
        print(chunk)

hello how are you
hello how are you
hello how are you
hello how are you
hello how are you
hello how 
are you
hello how are you
hello how are you
hello how are you
hello how are you
hello how are you
he
llo how are you
hello how are you
hello how are you
hello how are you
hello how are you
hello how ar
e you
hello how are you
hello how are you
hello how are you
hello how are you
hello how are you
hell
o how are you
hello how are you
hello how are you
hello how are you
hello how are you
hello how are 
you
hello how are you
hello how are you
hello how are you
hello how are you
hello how are you
hello 
how are you
hello how are you
hello how are you
hello how are you
hello how are you
hello how are yo
u
hello how are you
hello how are you
hello how are you
hello how are you
hello how are you
hello ho
w are you
hello how are you
hello how are you
hello how are you
hello how are you
hello how are you

hello how are you
hello how are you
hello how are you
hello how are you
hello how are you
h

In [3]:
# tell and seek
# tell tells the current position of the cursor
# seek moves the cursor to the specified position
with open('bigfile.txt','r') as file:
	print(file.read(10)) # reads the starting 10 characters
	print(file.tell()) # tells the position of cursor
	file.seek(100) # moves the cursor to specified position
	print(file.read(10))
	print(file.tell())

hello how 
10
 how are y
110


In [None]:
#seek can also take 2nd parameter which tells from where to move the cursor
# 0 - from starting of the file
with open('bigfile.txt','r') as file:
    print(file.read(10)) # reads the starting 10 characters
    print(file.tell()) # tells the position of cursor
    file.seek(50,0) # moves the cursor to specified position from starting
    print(file.read(10))
    print(file.tell())
    file.seek(-20,2) # moves the cursor to specified position from end
    print(file.read(10))
    print(file.tell())
    file.seek(10,1) # moves the cursor to specified position from current position
    print(file.read(10))
    print(file.tell())

In [4]:
#seek in write mode edits the file from that position
with open('bigfile.txt','w') as file:
    file.write('hello I am from different world do you know that.')
    file.seek(6)
    file.write('this is edited')

In [None]:
# Problems with working in text mode
# 1. Can't work with binary files like images, audio, video etc. For that we have to open the file in binary mode by adding 'b' in the mode parameter while opening the file. ex: 'rb', 'wb' etc.
# 2. Different OS have different ways of representing new line characters. Windows uses '\r\n' while Linux and macOS use '\n'. This can lead to inconsistencies when reading/writing files across different platforms. To handle this, Python provides universal newlines mode which is enabled by default in text mode.

# It is also not good with other data types like integers, floats, lists, dictionaries, tuples etc. For that we have to use serialization modules like pickle, json etc.

In [6]:
# Now opening a binary file like image
with open('pic1.jpg','rb') as file:
    with open('pic2.jpg','wb') as write_file_copy:
        write_file_copy.write(file.read())

In [None]:
#working with a big binary file like video
with open('bigvideo.mp4','rb') as file:
    with open('bigvideo_copy.mp4','wb') as write_file_copy:
        while True:
            chunk = file.read(1024*1024) # read in 1MB chunks
            if not chunk:
                break
            write_file_copy.write(chunk)

In [None]:
# making a dictionary and appending it to a file using write mode
dict1 = {'name':'Jay','age':24,'city':'Delhi'}
with open('data.txt','w') as file:
    # file.write(dict1) # this will give error because we cant write dictionary directly to a text file
    file.write(str(dict1)) # this makes the dictionary to string and writes to the file
    
with open('data.txt','r') as file:
    x = file.read()
    print(type(x)) # This will be string
    print(dict(x)) # this will give error because x is string not dictionary
    # A string cannot be directly converted to a dictionary using dict() function
    print(eval(x)) # this will convert string to dictionary but eval is not safe to use

<class 'str'>


In [None]:
# Using Serialization and Deserialization
# Serialization is the process of converting python data types to json format and deserialization is the reverse process of converting json data back to python data types.

# Serialization is the process of converting a data structure or object into a format that can be easily stored or transmitted, such as a byte stream or a string. Deserialization is the reverse process of converting the serialized data back into its original data structure or object.

In [None]:
# Serialization
import json
list1 = ['apple','banana','cherry']
dict1 = {'name':'Jay','age':24,'city':'Mumbai'}
with open('data.json','w') as f: # f is the file handler object
    json.dump(list1,f)
with open('data.json','w') as f:
    json.dump(dict1,f,indent=4)

In [None]:
# Deserialization
import json
with open('data.json','r') as r:
    data = json.load(r)
    print(data)
    print(type(data))

In [None]:
# Serialization and Deserialization with a tuple
import json
t = (1,2,3,4,5)
with open('data.json','w') as f:
    json.dump(t,f) # tuples get converted to list while serialization
with open('data.json','r') as r:
    data = json.load(r)
    print(data)
    print(type(data)) # this will be list not tuple

# This tuple behavior is because JSON does not have a native tuple data type.
# Tuples are converted to lists during serialization to ensure compatibility with the JSON format.
# When deserializing, the JSON data is read back into Python as lists, since JSON arrays correspond to Python lists.
# If you need to maintain the tuple type, you would have to manually convert the list back to a tuple after deserialization.

[1, 2, 3, 4, 5]
<class 'list'>


In [16]:
# Serialization and Deserialization with a nested data structure
import json
data = {
    'name': 'Alice',
    'age': 30,
    'marks': [85, 90, 95],
    'address': {
        'street': '123 Main St',
        'city': 'Wonderland',
        'zip': '12345'
    }
}
with open('data.json','w') as f:
    json.dump(data,f,indent = 4)
with open('data.json','r') as r:
    d = json.load(r)
    print(d)

{'name': 'Alice', 'age': 30, 'marks': [85, 90, 95], 'address': {'street': '123 Main St', 'city': 'Wonderland', 'zip': '12345'}}


In [18]:
# Serialization and Deserialization with a custom object
import json
class Person:
    def __init__(self,name,age):
        self.name = name
        self.age = age
        self.mobile = '1234567890'
person = Person('Bob',25)

# To serialize a custom object, we need to define a method to convert it to a serializable format, such as a dictionary. Similarly, during deserialization, we need to define how to convert the dictionary back to the custom object.
def show_person(obj):
    return {
        'name': obj.name,
        'age': obj.age,
        'mobile': obj.mobile
    }
with open('data.json','w') as f:
    json.dump(person,f,default = show_person,indent=4)
    
with open('data.json','r') as r:
    data = json.load(r)
    print(data)

{'name': 'Bob', 'age': 25, 'mobile': '1234567890'}


In [None]:
# Pickling and Unpickling
# Picking is the process where a python object is converted into a byte stream and unpickling is the reverse process where a byte stream is converted back to a python object.
# Pickling is used to save complex data types like lists, dictionaries, sets, tuples, and custom objects to a file or transmit them over a network.


In [None]:
# Making a custom object and pickling it

# isinstance is used to check if an object is an instance of a particular class or a tuple of classes.
class Person:
    def __init__(self,name,age):
        self.name = name
        self.age = age
        self.mobile = '1234567890'
    def display_info(self):
        print(f'Name: {self.name}, Age: {self.age}, Mobile: {self.mobile}')
person = Person('Bob',25)

import pickle
with open('person.pkl','wb') as pkl_file:
    pickle.dump(person,pkl_file) # pickle has dump method to pickle the object just like json has dump method to serialize

In [None]:
# Pickle lets us use the custom object later by unpickling it
# The person is custom object of Person class which also has display_info method
# Using pickle we can retrieve the object and use its methods and attributes as needed.
with open('person.pkl','rb') as pkl_file:
    unpickle = pickle.load(pkl_file) #pickle has load method to unpickle the object just like json has load method to deserialize
    unpickle.display_info()

Name: Bob, Age: 25, Mobile: 1234567890


In [None]:
# Pickle vs JSON

# Pickle is more powerful than JSON when it comes to serializing and deserializing complex Python objects, including custom classes and functions.

# But JSON is more human-readable and language-independent, making it a better choice for data interchange between different systems.

# So when we need to store or transmit complex Python objects within a Python environment, pickle is often the preferred choice. However, for data interchange between different programming languages or systems, JSON is usually more suitable.