# 5.1 DataFrames

## Key concepts

In [None]:
import pandas as pd

# Define the data
data = {
    'Product': ['Apple', 'Banana', 'Orange', 'Mango', 'Grape',
                'Pear', 'Pineapple', 'Peach', 'Kiwi', 'Watermelon'],
    'Quantity': [50, 30, 20, 15, 40, 25, 10, 18, 12, 5],
    'Price': [0.60, 0.40, 0.50, 1.20, 0.80, 0.55, 1.50, 0.90, 0.75, 3.00]
}

# Create the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


      Product  Quantity  Price
0       Apple        50   0.60
1      Banana        30   0.40
2      Orange        20   0.50
3       Mango        15   1.20
4       Grape        40   0.80
5        Pear        25   0.55
6   Pineapple        10   1.50
7       Peach        18   0.90
8        Kiwi        12   0.75
9  Watermelon         5   3.00


## Follow-Along Activity

In [None]:
import pandas as pd

In [None]:
# Sample data
data = {
    'Date': ['2025-01-10', '2025-01-11', '2025-01-12', '2025-01-13'],
    'Invoice_No': [101, 102, 103, 104],
    'Client': ['ABC Ltd', 'XYZ Ltd', 'LMN Ltd', 'ABC Ltd'],
    'Amount': [500, 1200, 700, 300],
    'Paid': [True, False, True, False]
}

# Create the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


         Date  Invoice_No   Client  Amount   Paid
0  2025-01-10         101  ABC Ltd     500   True
1  2025-01-11         102  XYZ Ltd    1200  False
2  2025-01-12         103  LMN Ltd     700   True
3  2025-01-13         104  ABC Ltd     300  False


In [None]:
# Access the first transaction (row index 0)
print(df.iloc[0])

Date          2025-01-10
Invoice_No           101
Client           ABC Ltd
Amount               500
Paid                True
Name: 0, dtype: object


In [None]:
# Access the second transaction (row index 1)
print(df.iloc[1])


Date          2025-01-11
Invoice_No           102
Client           XYZ Ltd
Amount              1200
Paid               False
Name: 1, dtype: object


In [None]:
# Access the 'Amount' column
print(df['Amount'])


0     500
1    1200
2     700
3     300
Name: Amount, dtype: int64


In [None]:
# Access 'Client' and 'Amount' columns
print(df[['Client', 'Amount']])


    Client  Amount
0  ABC Ltd     500
1  XYZ Ltd    1200
2  LMN Ltd     700
3  ABC Ltd     300


In [None]:
# Add VAT column (20% of Amount)
df['VAT'] = df['Amount'] * 0.2

# View the updated DataFrame
print(df)


         Date  Invoice_No   Client  Amount   Paid    VAT
0  2025-01-10         101  ABC Ltd     500   True  100.0
1  2025-01-11         102  XYZ Ltd    1200  False  240.0
2  2025-01-12         103  LMN Ltd     700   True  140.0
3  2025-01-13         104  ABC Ltd     300  False   60.0


In [None]:
# Filtering data (paid invoices)
paid_invoices = df[df['Paid'] == True]
print(paid_invoices)

         Date  Invoice_No   Client  Amount  Paid    VAT
0  2025-01-10         101  ABC Ltd     500  True  100.0
2  2025-01-12         103  LMN Ltd     700  True  140.0


In [None]:
# Filtering data (unpaid invoices)
unpaid_invoices = df[df['Paid'] == False]
print("Unpaid invoices:")
print(unpaid_invoices)

Unpaid invoices:
         Date  Invoice_No   Client  Amount   Paid    VAT
1  2025-01-11         102  XYZ Ltd    1200  False  240.0
3  2025-01-13         104  ABC Ltd     300  False   60.0


## Your Project

**Data**

In [None]:
# This is the data
data = {
    'Date': ['2025-01-01', '2025-01-03', '2025-01-05', '2025-01-07'],
    'Expense_ID': [1, 2, 3, 4],
    'Category': ['Office Supplies', 'Utilities', 'Travel', 'Office Supplies'],
    'Amount': [150.75, 300.50, 450.00, 120.25],
    'Approved': [True, False, True, False]
}

**Example code:**

In [None]:
import pandas as pd

# The data represents expenses for a small business.
data = {
    'Date': ['2025-01-01', '2025-01-03', '2025-01-05', '2025-01-07'],
    'Expense_ID': [1, 2, 3, 4],
    'Category': ['Office Supplies', 'Utilities', 'Travel', 'Office Supplies'],
    'Amount': [150.75, 300.50, 450.00, 120.25],
    'Approved': [True, False, True, False]
}

# Step 1: Load the Expense Dataset
df = pd.DataFrame(data)
print("Expense Dataset:")
print(df)

# Step 2: Access a Specific Row
print("Accessing row at index 0:")
print(df.iloc[0])

# Step 3: Access a Specific Column
print("Accessing 'Category' column:")
print(df['Category'])

# Step 4: Add a New Column (Tax @ 10%)
df['Tax'] = df['Amount'] * 0.1
print("Data after adding Tax column:")
print(df)

# Step 5: Filter Data (Unapproved Expenses)
unapproved_expenses = df[df['Approved'] == False]
print("Unapproved expenses:")
print(unapproved_expenses)

Expense Dataset:
         Date  Expense_ID         Category  Amount  Approved
0  2025-01-01           1  Office Supplies  150.75      True
1  2025-01-03           2        Utilities  300.50     False
2  2025-01-05           3           Travel  450.00      True
3  2025-01-07           4  Office Supplies  120.25     False
Accessing row at index 0:
Date               2025-01-01
Expense_ID                  1
Category      Office Supplies
Amount                 150.75
Approved                 True
Name: 0, dtype: object
Accessing 'Category' column:
0    Office Supplies
1          Utilities
2             Travel
3    Office Supplies
Name: Category, dtype: object
Data after adding Tax column:
         Date  Expense_ID         Category  Amount  Approved     Tax
0  2025-01-01           1  Office Supplies  150.75      True  15.075
1  2025-01-03           2        Utilities  300.50     False  30.050
2  2025-01-05           3           Travel  450.00      True  45.000
3  2025-01-07           4  Off