# What Is a JSON File?
JSON (JavaScript Object Notation) is a text format for storing structured data, like dictionaries or lists.

In [6]:
import json

# Sample data
data = [
    {"name": "Ali", "age": 25, "city": "Lahore"},
    {"name": "Sara", "age": 22, "city": "Karachi"}
]

# Write to JSON file
with open("employees.json", "w") as f:
    json.dump(data, f, indent=4)



In [7]:
with open("employees.json", "r") as f:
    content = json.load(f)
    print(content)



[{'name': 'Ali', 'age': 25, 'city': 'Lahore'}, {'name': 'Sara', 'age': 22, 'city': 'Karachi'}]


In [9]:
import pandas as pd

df = pd.read_json("employees.json", orient="columns")
print(df)


   name  age     city
0   Ali   25   Lahore
1  Sara   22  Karachi


In [11]:
df = pd.read_json("employees.json", typ="series")
df

0      {'name': 'Ali', 'age': 25, 'city': 'Lahore'}
1    {'name': 'Sara', 'age': 22, 'city': 'Karachi'}
dtype: object

In [12]:
import pandas as pd

# Example dictionary to be stored in JSON
data = {
    "Ali": 25,
    "Sara": 30,
    "Usman": 27
}

# Convert to Series and save as JSON
s = pd.Series(data)
s.to_json("empl.json")

# Now read it back
df = pd.read_json("empl.json", typ="series")
print(df)


Ali      25
Sara     30
Usman    27
dtype: int64


In [13]:
import pandas as pd

df = pd.read_json("float_data.json", precise_float=True)
print(df)



      value
0  0.123457
1  0.246914
2  0.370370


In [14]:
import pandas as pd

for chunk in pd.read_json("bigfile.json", lines=True, chunksize=1000):
    print(chunk.head())
    print("="*50)


   id        name    salary
0   0  Employee_0  30000.00
1   1  Employee_1  30010.12
2   2  Employee_2  30020.25
3   3  Employee_3  30030.37
4   4  Employee_4  30040.49
        id           name    salary
1000  1000  Employee_1000  40123.00
1001  1001  Employee_1001  40133.12
1002  1002  Employee_1002  40143.25
1003  1003  Employee_1003  40153.37
1004  1004  Employee_1004  40163.49
        id           name    salary
2000  2000  Employee_2000  50246.00
2001  2001  Employee_2001  50256.12
2002  2002  Employee_2002  50266.25
2003  2003  Employee_2003  50276.37
2004  2004  Employee_2004  50286.49
        id           name    salary
3000  3000  Employee_3000  60369.00
3001  3001  Employee_3001  60379.12
3002  3002  Employee_3002  60389.25
3003  3003  Employee_3003  60399.37
3004  3004  Employee_3004  60409.49
        id           name    salary
4000  4000  Employee_4000  70492.00
4001  4001  Employee_4001  70502.12
4002  4002  Employee_4002  70512.25
4003  4003  Employee_4003  70522.37
4004

In [15]:
import pandas as pd

# Example JSON data from a public API (posts)
url = "https://jsonplaceholder.typicode.com/posts"

df = pd.read_json(url)
print(df.head())


   userId  id                                              title  \
0       1   1  sunt aut facere repellat provident occaecati e...   
1       1   2                                       qui est esse   
2       1   3  ea molestias quasi exercitationem repellat qui...   
3       1   4                               eum et est occaecati   
4       1   5                                 nesciunt quas odio   

                                                body  
0  quia et suscipit\nsuscipit recusandae consequu...  
1  est rerum tempore vitae\nsequi sint nihil repr...  
2  et iusto sed quo iure\nvoluptatem occaecati om...  
3  ullam et saepe reiciendis voluptatem adipisci\...  
4  repudiandae veniam quaerat sunt sed\nalias aut...  


In [20]:
import pandas as pd

data = [
    {"id": 1, "name": "Alice", "score": 95.5},
    {"id": 2, "name": "Bob", "score": 88.0},
    {"id": 3, "name": "Charlie", "score": 79.3}
]

df = pd.DataFrame(data)

# Save as plain JSON
df.to_json("students.json", orient="records", lines=True)



In [21]:
# GZIP format
df.to_json("students.json.gz", orient="records", lines=True, compression="gzip")

# BZIP2 format
df.to_json("students.json.bz2", orient="records", lines=True, compression="bz2")

# ZIP format (pandas only supports single file in zip)
df.to_json("students.json.zip", orient="records", lines=True, compression="zip")

# XZ format
df.to_json("students.json.xz", orient="records", lines=True, compression="xz")


In [22]:
# Read gzip
df_gz = pd.read_json("students.json.gz", compression="gzip", lines=True)

# Read bz2
df_bz2 = pd.read_json("students.json.bz2", compression="bz2", lines=True)

# Read zip
df_zip = pd.read_json("students.json.zip", compression="zip", lines=True)

# Read xz
df_xz = pd.read_json("students.json.xz", compression="xz", lines=True)


In [23]:
df_gz


Unnamed: 0,id,name,score
0,1,Alice,95.5
1,2,Bob,88.0
2,3,Charlie,79.3


In [24]:
df_bz2 

Unnamed: 0,id,name,score
0,1,Alice,95.5
1,2,Bob,88.0
2,3,Charlie,79.3
