# **Reading and Writing Files**

#### **1. Writing Data to a CSV File**

In [9]:
import csv
from faker import Faker

# Use a context manager to ensure the file is properly closed
with open('write_data_to_csv.csv', 'w', newline='') as output:
    mywriter = csv.writer(output)

    header = ['name', 'age', 'street', 'city', 'state', 'zip', 'lng', 'lat']
    mywriter.writerow(header)

    fake = Faker()

    for _ in range(1000):
        mywriter.writerow([
            fake.name(),
            fake.random_int(min=18, max=80, step=1),
            fake.street_address(),
            fake.city(),
            fake.state(),
            fake.zipcode(),
            fake.longitude(),
            fake.latitude()
        ])


#### **2. Read CSV File using Pandas**

In [10]:
import pandas as pd

df_csv = pd.read_csv('write_data_to_csv.csv')
df_csv.head()

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,Paul Blake,59,5997 Brooke Fall Suite 431,Martinezview,Alaska,56291,-124.890396,-72.662665
1,Michele Sparks,25,341 Robert Road Apt. 391,West Joshuaburgh,Ohio,22272,105.276738,-18.659475
2,Marcus Williams,79,7771 Mccann Route,West Raymond,Nevada,62209,-157.482064,39.809945
3,Brian Roth,61,77434 Paul Valley Apt. 544,Cuevasmouth,Vermont,90896,-132.806027,15.577752
4,Kristopher Vega,47,7706 Christy Crest Apt. 778,Mackville,Idaho,34027,49.75322,6.670694


In [11]:
df_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    1000 non-null   object 
 1   age     1000 non-null   int64  
 2   street  1000 non-null   object 
 3   city    1000 non-null   object 
 4   state   1000 non-null   object 
 5   zip     1000 non-null   int64  
 6   lng     1000 non-null   float64
 7   lat     1000 non-null   float64
dtypes: float64(2), int64(2), object(4)
memory usage: 62.6+ KB


#### **3. Write Data to a JSON File**

In [15]:
from faker import Faker
import json

# Initialize Faker
fake = Faker()

# Create a dictionary to hold all the records
alldata = {'records': []}

# Generate 1000 entries of fake data
for _ in range(1000):
    data = {
        "name": fake.name(),
        "age": fake.random_int(min=18, max=80, step=1),
        "street": fake.street_address(),
        "city": fake.city(),
        "state": fake.state(),
        "zip": fake.zipcode(),
        "lng": float(fake.longitude()),
        "lat": float(fake.latitude())
    }
    alldata['records'].append(data)

# Write the data to a JSON file
with open('write_data_to_json.json', 'w') as output:
    json.dump(alldata, output, indent=4)


#### **Read the JSON File using Pandas**

In [21]:
df_json = pd.read_json('write_data_to_json.json')
# Display the first two rows in JSON format
json_output = df_json.head(2).to_json(orient='records')
print(json_output)

[{"records":{"name":"Desiree Ramos","age":68,"street":"347 Cross Turnpike Suite 705","city":"Singletonside","state":"Wyoming","zip":"89699","lng":-62.444957,"lat":-23.4954855}},{"records":{"name":"Isaac Austin","age":36,"street":"76744 Burke Loaf","city":"East Sheila","state":"Florida","zip":"73108","lng":-13.253698,"lat":-8.7869115}}]


In [22]:
df_json_normalized = pd.json_normalize(df_json['records'])
df_json_normalized.head()

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,Desiree Ramos,68,347 Cross Turnpike Suite 705,Singletonside,Wyoming,89699,-62.444957,-23.495486
1,Isaac Austin,36,76744 Burke Loaf,East Sheila,Florida,73108,-13.253698,-8.786912
2,Sheri Rich,63,813 Wilson Pass Suite 390,New April,Delaware,38140,168.371959,-34.74318
3,David Payne,23,63530 Jensen Parkway,East Chelseaburgh,Nebraska,99685,-52.382286,2.843711
4,William Martinez,80,92888 Angela Motorway Suite 432,Annetteton,Delaware,55216,-157.872727,-26.759869
