# Reading CSV Files 

In [4]:
# What is a CSV file?

# A CSV (Comma-Separated Values) file stores tabular data in plain text.
# Each line corresponds to a row, and values are separated by commas.

In [64]:
# Let's assume we have a folder named `data_sets` containing a file `sample_data.csv`.

df = pd.read_csv('data_sets/sample_data.csv')

In [66]:
# Display the entire DataFrame
print(df)

    ID       Name  Age             Subject        Address ContactNumber
0    1       John   25                Math    123 Main St      555-1234
1    2       Emma   30             Science     456 Elm St      555-5678
2    3    Michael   28             English     789 Oak St      555-9876
3    4     Sophia   32             History    321 Pine St      555-4321
4    5    William   27           Geography   987 Maple St      555-8765
5    6     Olivia   31             Physics   654 Cedar St      555-2468
6    7      James   26           Chemistry   321 Birch St      555-1357
7    8        Ava   29             Biology  876 Willow St      555-8642
8    9   Benjamin   33            Computer  543 Cherry St      555-9753
9   10   Isabella   27                 Art  210 Spruce St      555-6312
10  11      Jacob   29               Music     876 Elm St      555-8402
11  12        Mia   34  Physical Education     543 Oak St      555-3201
12  13      Henry   26          Literature    987 Pine St      5

In [68]:
# Display the first 5 rows (by default, head() returns the first 5 rows)

print(df.head())

   ID     Name  Age    Subject       Address ContactNumber
0   1     John   25       Math   123 Main St      555-1234
1   2     Emma   30    Science    456 Elm St      555-5678
2   3  Michael   28    English    789 Oak St      555-9876
3   4   Sophia   32    History   321 Pine St      555-4321
4   5  William   27  Geography  987 Maple St      555-8765


In [70]:
# Count the number of non-null (non-empty) values in each column

print(df.count())  # This helps check if there are missing values.

ID               20
Name             20
Age              20
Subject          20
Address          20
ContactNumber    20
dtype: int64


In [72]:
# Get the shape of the DataFrame (rows, columns)

print(df.shape)  # (number of rows, number of columns)

(20, 6)


In [74]:
# Select a single column

print(df['Name'])  # Extracts only the "Name" column

0          John
1          Emma
2       Michael
3        Sophia
4       William
5        Olivia
6         James
7           Ava
8      Benjamin
9      Isabella
10        Jacob
11          Mia
12        Henry
13    Charlotte
14       Daniel
15       Amelia
16        David
17        Emily
18    Alexander
19        Grace
Name: Name, dtype: object


In [76]:
# Select multiple columns

print(df[['Name', 'Address']])  # Extracts "Name" and "Address" columns

         Name        Address
0        John    123 Main St
1        Emma     456 Elm St
2     Michael     789 Oak St
3      Sophia    321 Pine St
4     William   987 Maple St
5      Olivia   654 Cedar St
6       James   321 Birch St
7         Ava  876 Willow St
8    Benjamin  543 Cherry St
9    Isabella  210 Spruce St
10      Jacob     876 Elm St
11        Mia     543 Oak St
12      Henry    987 Pine St
13  Charlotte   654 Cedar St
14     Daniel   321 Birch St
15     Amelia  210 Willow St
16      David   876 Maple St
17      Emily     543 Elm St
18  Alexander     987 Oak St
19      Grace    654 Pine St


# Writing CSV Files 

In [79]:
# 🔹 What is writing a CSV file?

# Writing a DataFrame to a CSV file allows us to store and share data in a structured format.

In [81]:
df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari'],
    'Age': [12, 13, 14],
    'City': ['Ktm', 'Bkt', 'Pkr']
})

In [83]:
# 📌 Save the DataFrame to a CSV file inside `data_sets`

df.to_csv('data_sets/csv.csv', index=False)  # index=False prevents writing row indices

In [85]:
# 📌 Read the newly created CSV file

df = pd.read_csv('data_sets/csv.csv')
print(df)

    Name  Age City
0    Ram   12  Ktm
1  Shyam   13  Bkt
2   Hari   14  Pkr


# Reading Excel Files

In [88]:
# What is an Excel file?

# Excel files (XLSX) store structured data in multiple sheets.
# Pandas allows us to read specific sheets using `sheet_name`.

In [90]:
df = pd.read_excel('data_sets/sample_data.xlsx', sheet_name="Sheet1")  

# `sheet_name="Sheet1"` specifies which sheet to read

In [92]:
print(df)

    ID       Name  Age             Subject        Address Contact Number
0    1       John   25                Math    123 Main St       555-1234
1    2       Emma   30             Science     456 Elm St       555-5678
2    3    Michael   28             English     789 Oak St       555-9876
3    4     Sophia   32             History    321 Pine St       555-4321
4    5    William   27           Geography   987 Maple St       555-8765
5    6     Olivia   31             Physics   654 Cedar St       555-2468
6    7      James   26           Chemistry   321 Birch St       555-1357
7    8        Ava   29             Biology  876 Willow St       555-8642
8    9   Benjamin   33            Computer  543 Cherry St       555-9753
9   10   Isabella   27                 Art  210 Spruce St       555-6312
10  11      Jacob   29               Music     876 Elm St       555-8402
11  12        Mia   34  Physical Education     543 Oak St       555-3201
12  13      Henry   26          Literature    987 P

# Writing Excel Files 

In [95]:
df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari'],
    'Age': [12, 13, 14],
    'City': ['Ktm', 'Bkt', 'Pkr']
})

# Using ExcelWriter for better control over writing Excel files
writer = pd.ExcelWriter('data_sets/excel.xlsx')

In [97]:
# Save DataFrame to an Excel file without writing index values

df.to_excel(writer, index=False)

In [99]:
# Save and close the file

writer._save()

In [101]:
#  Read the Excel file back into a DataFrame

df_read = pd.read_excel('data_sets/excel.xlsx', sheet_name="Sheet1")
print(df_read)

    Name  Age City
0    Ram   12  Ktm
1  Shyam   13  Bkt
2   Hari   14  Pkr


# Reading JSON File

In [104]:
# What is a JSON file?

# JSON (JavaScript Object Notation) is a lightweight format for storing structured data.
# It's widely used in APIs and web services.

In [106]:
# Reading a JSON file from `data_sets`

with open("data_sets/sample_data.json", "r") as file:
    data = file.read()  # Read the file content
    df = pd.read_json(data)  # Convert JSON data into a Pandas DataFrame

print(df.head(3))  
print(df.tail(4))  
print(df['Name']) 

   ID     Name  Age  Subject      Address Contact Number
0   1     John   25     Math  123 Main St       555-1234
1   2     Emma   30  Science   456 Elm St       555-5678
2   3  Michael   28  English   789 Oak St       555-9876
    ID       Name  Age            Subject       Address Contact Number
16  17      David   27          Sociology  876 Maple St       555-6402
17  18      Emily   32  Political Science    543 Elm St       555-1298
18  19  Alexander   30           Business    987 Oak St       555-7531
19  20      Grace   29   Foreign Language   654 Pine St       555-4129
0          John
1          Emma
2       Michael
3        Sophia
4       William
5        Olivia
6         James
7           Ava
8      Benjamin
9      Isabella
10        Jacob
11          Mia
12        Henry
13    Charlotte
14       Daniel
15       Amelia
16        David
17        Emily
18    Alexander
19        Grace
Name: Name, dtype: object


  df = pd.read_json(data)  # Convert JSON data into a Pandas DataFrame


# Writing JSON Files

In [109]:
df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari'],
    'Age': [12, 13, 14],
    'City': ['Ktm', 'Bkt', 'Pkr']
})

In [111]:
# 📌 Save DataFrame to a JSON file

df.to_json('data_sets/jsoneg.json', orient="records")  
# `orient="records"` formats data as a list of dictionaries

# Reading JSON Again

In [114]:
with open("data_sets/jsoneg.json", "r") as file:
    data = file.read()  # Read file content
    df = pd.read_json(data)  # Convert JSON to DataFrame

print(df.head(3))
print(df.tail(4))
print(df['Name'])

    Name  Age City
0    Ram   12  Ktm
1  Shyam   13  Bkt
2   Hari   14  Pkr
    Name  Age City
0    Ram   12  Ktm
1  Shyam   13  Bkt
2   Hari   14  Pkr
0      Ram
1    Shyam
2     Hari
Name: Name, dtype: object


  df = pd.read_json(data)  # Convert JSON to DataFrame
