### Dataframe

In [32]:
import pandas as pd

students = [
    { 'name': 'Alice', 'age': 21, 'course': 'Physics'},
    { 'name': 'Codeine', 'age': 22, 'course': 'Chemistry'},
    { 'name': 'Kate', 'age': 25, 'course': 'Biology'}
]
d = pd.DataFrame(data=students, index=['school1', 'school2', 'school1'])
d

Unnamed: 0,name,age,course
school1,Alice,21,Physics
school2,Codeine,22,Chemistry
school1,Kate,25,Biology


In [33]:
# Similar to Series we can extract data using .loc and .iloc attributes. 
# Because Dataframe is two dimentional, passing a single value to the loc
# indexing operator will return the series if is there only one row to return
d.loc['school2']

name        Codeine
age              22
course    Chemistry
Name: school2, dtype: object

In [34]:
# If the indexes are not unique loc will return a Dataframe object
d.loc['school1']

Unnamed: 0,name,age,course
school1,Alice,21,Physics
school1,Kate,25,Biology


In [35]:
# Dataframe allows to extract data by multiple axes
# beign first index and second columns Names
d.loc['school1', 'name']
# Here we get school1's students names

school1    Alice
school1     Kate
Name: name, dtype: object

In [36]:
# If you want to select a single column you can use:
d.T

Unnamed: 0,school1,school2,school1.1
name,Alice,Codeine,Kate
age,21,22,25
course,Physics,Chemistry,Biology


In [37]:
# and then 
d.T.loc['name']

school1      Alice
school2    Codeine
school1       Kate
Name: name, dtype: object

In [38]:
d

Unnamed: 0,name,age,course
school1,Alice,21,Physics
school2,Codeine,22,Chemistry
school1,Kate,25,Biology


In [39]:
# Here is some important. Pandas reserves INDEXING OPERATOR
# directly on the dataframe for COLUMN SELECTION
d['name']

school1      Alice
school2    Codeine
school1       Kate
Name: name, dtype: object

In [40]:
# .loc and .iloc are used only for row selection
d.loc['school2']

name        Codeine
age              22
course    Chemistry
Name: school2, dtype: object

In [41]:
# Now look at this. With all the previous knowledgment you can do this
d.loc[:, ['name', 'age']]
# To select all rows using colon operator and selecting name and age columns
# specified within a list

Unnamed: 0,name,age
school1,Alice,21
school2,Codeine,22
school1,Kate,25


---

In [42]:
# Now let's take a look to .drop()
d.drop('school1')
# It deletes rows, and be aware with the way that it does it.
# Because it doesn't modify the original dataframe, it just return a copy.

Unnamed: 0,name,age,course
school2,Codeine,22,Chemistry


In [43]:
# as you can see
d

Unnamed: 0,name,age,course
school1,Alice,21,Physics
school2,Codeine,22,Chemistry
school1,Kate,25,Biology


In [45]:
# But if you want to do that you need using inplace flag
d_copy = d.copy()
d_copy.drop('school2', inplace=True)
d_copy
# Here it was deleted by reference
# or you can also use

# del d_copy['school2']
# It's going to delete it by reference

Unnamed: 0,name,age,course
school1,Alice,21,Physics
school1,Kate,25,Biology


In [46]:
# You can select to drop columns too
# setting axis=1
d_copy.drop('course', axis=1, inplace=True)
d_copy

Unnamed: 0,name,age
school1,Alice,21
school1,Kate,25


In [48]:
# Finally add a new column using bracket and = operator
d_copy['Hello'] = [':)', ':D']
d_copy
# Later you will can create new columns based on existing columns on the dataframe

Unnamed: 0,name,age,Hello
school1,Alice,21,:)
school1,Kate,25,:D
