# Data Frame Selection


In [1]:
# First, let's start by importing pandas and creating a sample DataFrame:
import pandas as pd
# Sample DataFrame
data = {
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]
}
df = pd.DataFrame(data)
print(df)

   A  B   C
0  1  5   9
1  2  6  10
2  3  7  11
3  4  8  12


# 1. Column and Row Selection

In [3]:
# Single column
print(df['A'])

# Multiple columns
print(df[['A', 'B']])


0    1
1    2
2    3
3    4
Name: A, dtype: int64
   A  B
0  1  5
1  2  6
2  3  7
3  4  8


In [19]:

# Single column with loc
print(df.loc[:, 'A'])

# Multiple columns with loc
print(df.loc[:, ['A', 'B']])

# Single column with iloc
print(df.iloc[:, 0])

# Multiple columns with iloc
print(df.iloc[:, [0, 1]])

0    2
1    4
2    6
3    8
Name: A, dtype: int64
   A  B
0  2  5
1  4  6
2  6  7
3  8  8
0    2
1    4
2    6
3    8
Name: A, dtype: int64
   A  B
0  2  5
1  4  6
2  6  7
3  8  8


In [20]:
# using loc
# Single row
print(df.loc[0])

# Multiple rows
print(df.loc[0:2])

A       2
B       5
C       9
E       7
F      -4
G      18
H     4.5
I    True
Name: 0, dtype: object
   A  B   C   E  F   G         H     I
0  2  5   9   7 -4  18  4.500000  True
1  4  6  10  10 -4  40  2.500000  True
2  6  7  11  13 -4  66  1.833333  True


In [31]:
# Using iloc:
# Single row
print(df.iloc[0])

# Multiple rows
print(df.iloc[0:2])

A       4
B       5
C       9
E       9
F      -4
G      36
H    2.25
I    True
Name: 0, dtype: object
   A  B   C   E  F   G     H     I
0  4  5   9   9 -4  36  2.25  True
1  8  6  10  14 -4  80  1.25  True


# 2. Conditional Selection

In [32]:
# Select rows where column 'A' is greater than 2
print(df[df['A'] > 2])

# Select rows where column 'A' is greater than 2 and column 'B' is less than 8
print(df[(df['A'] > 2) & (df['B'] < 8)])

    A  B   C   E  F    G         H     I
0   4  5   9   9 -4   36  2.250000  True
1   8  6  10  14 -4   80  1.250000  True
2  12  7  11  19 -4  132  0.916667  True
3  16  8  12  24 -4  192  0.750000  True
    A  B   C   E  F    G         H     I
0   4  5   9   9 -4   36  2.250000  True
1   8  6  10  14 -4   80  1.250000  True
2  12  7  11  19 -4  132  0.916667  True


# 3. Adding, Deleting, and Updating Columns

* Adding a Column

In [33]:
# Adding a new column
df['D'] = df['A'] + df['B']
print(df)

    A  B   C   E  F    G         H     I   D
0   4  5   9   9 -4   36  2.250000  True   9
1   8  6  10  14 -4   80  1.250000  True  14
2  12  7  11  19 -4  132  0.916667  True  19
3  16  8  12  24 -4  192  0.750000  True  24


* Deleting a Column

In [34]:
# Deleting a column
df.drop('D', axis=1, inplace=True)
print(df)

    A  B   C   E  F    G         H     I
0   4  5   9   9 -4   36  2.250000  True
1   8  6  10  14 -4   80  1.250000  True
2  12  7  11  19 -4  132  0.916667  True
3  16  8  12  24 -4  192  0.750000  True


* Updating a Column

In [35]:
# Updating a column
df['A'] = df['A'] * 2
print(df)

    A  B   C   E  F    G         H     I
0   8  5   9   9 -4   36  2.250000  True
1  16  6  10  14 -4   80  1.250000  True
2  24  7  11  19 -4  132  0.916667  True
3  32  8  12  24 -4  192  0.750000  True


# 4. Index Operations

* Setting an Index

In [36]:
# Setting a column as index
df.set_index('A', inplace=True)
print("\nDataFrame after setting index to column A:\n", df)


DataFrame after setting index to column A:
     B   C   E  F    G         H     I
A                                    
8   5   9   9 -4   36  2.250000  True
16  6  10  14 -4   80  1.250000  True
24  7  11  19 -4  132  0.916667  True
32  8  12  24 -4  192  0.750000  True


* Resetting an Index

In [37]:
# Resetting index to default integer index
df.reset_index(inplace=True)
print("\nDataFrame after resetting index:\n", df)


DataFrame after resetting index:
     A  B   C   E  F    G         H     I
0   8  5   9   9 -4   36  2.250000  True
1  16  6  10  14 -4   80  1.250000  True
2  24  7  11  19 -4  132  0.916667  True
3  32  8  12  24 -4  192  0.750000  True


* Removing an Index

In [38]:
# Removing the index name
df.index.name = None
print("\nDataFrame after removing index name:\n", df)


DataFrame after removing index name:
     A  B   C   E  F    G         H     I
0   8  5   9   9 -4   36  2.250000  True
1  16  6  10  14 -4   80  1.250000  True
2  24  7  11  19 -4  132  0.916667  True
3  32  8  12  24 -4  192  0.750000  True



# 5. Operations Between Columns

* Addition, Subtraction, Multiplication, and Division

In [39]:
# Addition
df['E'] = df['A'] + df['B']
print(df)

# Subtraction
df['F'] = df['B'] - df['C']
print(df)

# Multiplication
df['G'] = df['A'] * df['C']
print(df)

# Division
df['H'] = df['C'] / df['A']
print(df)

    A  B   C   E  F    G         H     I
0   8  5   9  13 -4   36  2.250000  True
1  16  6  10  22 -4   80  1.250000  True
2  24  7  11  31 -4  132  0.916667  True
3  32  8  12  40 -4  192  0.750000  True
    A  B   C   E  F    G         H     I
0   8  5   9  13 -4   36  2.250000  True
1  16  6  10  22 -4   80  1.250000  True
2  24  7  11  31 -4  132  0.916667  True
3  32  8  12  40 -4  192  0.750000  True
    A  B   C   E  F    G         H     I
0   8  5   9  13 -4   72  2.250000  True
1  16  6  10  22 -4  160  1.250000  True
2  24  7  11  31 -4  264  0.916667  True
3  32  8  12  40 -4  384  0.750000  True
    A  B   C   E  F    G         H     I
0   8  5   9  13 -4   72  1.125000  True
1  16  6  10  22 -4  160  0.625000  True
2  24  7  11  31 -4  264  0.458333  True
3  32  8  12  40 -4  384  0.375000  True


* Logical OR Operation

In [40]:
# Logical OR operation (element-wise)
df['I'] = (df['A'] > 2) | (df['B'] < 7)
print(df)

    A  B   C   E  F    G         H     I
0   8  5   9  13 -4   72  1.125000  True
1  16  6  10  22 -4  160  0.625000  True
2  24  7  11  31 -4  264  0.458333  True
3  32  8  12  40 -4  384  0.375000  True
