# Creating and Accessing DataFrames

In [1]:
import pandas as pd

In [4]:
dict = {
    "email": ["amanda@gmail.com", "mike@gmail.com", "john@gmail.com"],
    "first": ["Amanda", "Mike", "John"],
    "last": ["Taylor", "Tyson", "Doe"]
}
dict

{'email': ['amanda@gmail.com', 'mike@gmail.com', 'john@gmail.com'],
 'first': ['Amanda', 'Mike', 'John'],
 'last': ['Taylor', 'Tyson', 'Doe']}

In [5]:
df = pd.DataFrame(dict)
df

Unnamed: 0,email,first,last
0,amanda@gmail.com,Amanda,Taylor
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


In [63]:
# Accessing a column. Column is extracted as a Pandas Series which is equivalent to a vector or 1D array
a = df['email']
print('column email is:\n', a, '\n')
print('type of email column is:', type(a))

column email is:
 0    amanda@gmail.com
1      mike@gmail.com
2      john@gmail.com
Name: email, dtype: object 

type of email column is: <class 'pandas.core.series.Series'>


In [18]:
# Accessing multiple columns
b = df[ ["email", "last"] ]
b

Unnamed: 0,email,last
0,amanda@gmail.com,Taylor
1,mike@gmail.com,Tyson
2,john@gmail.com,Doe


In [19]:
# Listing columns
c = df.columns
c

Index(['email', 'first', 'last'], dtype='object')

- **By default, pandas assign a unique integer identifier to each row. It is the first unlabelled column.**
- **However, there's no unique constraint on this integer identifier. Pandas won't stop you if somehow you get multiple records against one integer id**

In [42]:
# Accessing a row using the default unique identifier
d = df.iloc[0]
d

email    amanda@gmail.com
first              Amanda
last               Taylor
Name: 0, dtype: object

In [46]:
# Accessing multiple rows using the default unique identifier
e = df.iloc[ [1,2] ]
print('multiple rows accessed by explicitly stating the ids:')
e

multiple rows accessed by explicitly stating the ids:


Unnamed: 0,email,first,last
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


In [50]:
# Accessing multiple rows using the default unique identifier
print('multiple rows accessed by slicing syntax:')
f = df.iloc[0:3]  #Accesses rows 0 to 2 inclusive.
f

multiple rows accessed by slicing syntax:


Unnamed: 0,email,first,last
0,amanda@gmail.com,Amanda,Taylor
1,mike@gmail.com,Mike,Tyson
2,john@gmail.com,John,Doe


In [75]:
# Accessing both columns and rows
g = df.iloc[ [1,2], [0,1] ]
print("second row's first and second columns:")
g

second row's first and second columns:


Unnamed: 0,email,first
1,mike@gmail.com,Mike
2,john@gmail.com,John


In [89]:
# Accessing selected columns of a row (extracts as a Series)
h = df.iloc[ 1, 1:3 ]
h

first     Mike
last     Tyson
Name: 1, dtype: object

In [87]:
# Accessing a single element from DataFrame
i = df.iloc[ 2,2 ]
print('element in second column and second row:')
i

element in second column and second row:


'Doe'