## Pandas

- Pandas is a package for Python that allows us to manipulate Data Frames (similar to spreadsheets in Excel or Google Sheets) programatically

In [20]:
# Import Pandas

import pandas as pd

In [21]:
# Create a DataFrame from a dictionary mapping strings (column header names) to lists (column values)

data = {'Names': ['A', 'B', 'C'], 'Height': [167, 190, 132], 'IsAdult': [True, False, True]}

my_data_frame = pd.DataFrame(data)
my_data_frame

Unnamed: 0,Names,Height,IsAdult
0,A,167,True
1,B,190,False
2,C,132,True


In [22]:
# Accessing columns
print(my_data_frame['Height']) # Returns a Pandas Series object

# Get elements of a column in list form
my_data_frame['Height'].tolist() # Returns a normal list

0    167
1    190
2    132
Name: Height, dtype: int64


[167, 190, 132]

In [23]:
# Subsetting Rows
my_data_frame.loc[[1, 2]]  # Get rows at indices 1 and 2

# Getting Elements from a Specific row
my_data_frame.loc[1].tolist()

['B', 190, False]

In [24]:
# Indexes can be arbitrary

my_data_frame_2 = my_data_frame.copy() # Deepcopy existing dataframe
my_data_frame_2.index = pd.Index([10, 20, 30])
my_data_frame_2.loc[[10, 20]]

Unnamed: 0,Names,Height,IsAdult
10,A,167,True
20,B,190,False


In [25]:
# Pure integer-based row-indexing (helpful when index is not 0 to the length of the DataFrame)
my_data_frame_2.iloc[[0]]

Unnamed: 0,Names,Height,IsAdult
10,A,167,True


In [26]:
# Looping through dataframe

for index, row in my_data_frame.iterrows():
    print("Index is", index)
    print("Row is", row.tolist())

Index is 0
Row is ['A', 167, True]
Index is 1
Row is ['B', 190, False]
Index is 2
Row is ['C', 132, True]


In [27]:
for i, r in my_data_frame.iterrows():
    my_data_frame.at[i, 'Height'] = my_data_frame.at[i, 'Height'] / 100  # Convert cm to m
print(my_data_frame)

  Names  Height  IsAdult
0     A    1.67     True
1     B    1.90    False
2     C    1.32     True
