In [83]:
# Pandas is a tabular data manipulation library.
# It has two axis which indexed started from 0.
# The horizonal axis (axis=1) is called columns.
# The vertical axis (axis=0) is called index.
import pandas as pd

movies = {
    'title': ['Star War', 'Avatar', 'End Game'],
    'year' : [2000, 2005, 2019],
    'revenue': [100, 200, 150]
}
print(movies)


{'title': ['Star War', 'Avatar', 'End Game'], 'year': [2000, 2005, 2019], 'revenue': [100, 200, 150]}


In [84]:
# construct DataFrame from dictionary
movies_df = pd.DataFrame(movies)
print(movies_df)


      title  year  revenue
0  Star War  2000      100
1    Avatar  2005      200
2  End Game  2019      150


In [85]:
# We can get the list of columns names by:
columns_names = movies_df.columns
print(columns_names)


Index(['title', 'year', 'revenue'], dtype='object')


In [86]:
# We can get the list of index by:
index = movies_df.index
print(index)


RangeIndex(start=0, stop=3, step=1)


In [87]:
# We can access columns data by specify the column name, like a dictionary syntax
print(movies_df['title'])

# To access mutiple columns, we use provide list of columns names
print(movies_df[['title', 'year']])


0    Star War
1      Avatar
2    End Game
Name: title, dtype: object
      title  year
0  Star War  2000
1    Avatar  2005
2  End Game  2019


In [88]:
# We can access any cell/subset/slice of the DataFrame using either position/index or names/labels of both axises
# To access by numeric index/position, we use 'iloc' property.
cell_0_0 = movies_df.iloc[0, 0]
print(cell_0_0)


Star War


In [89]:
cell_1_2 = movies_df.iloc[1, 2]
print(cell_1_2)

200


In [90]:
cell_with_index_0_2_and_column_1_2 = movies_df.iloc[[0, 2], [1, 2]]
print(cell_with_index_0_2_and_column_1_2)

   year  revenue
0  2000      100
2  2019      150


In [91]:
# If we specify only one param/argument in the square brackets, we get the whole row
row_0 = movies_df.iloc[0]
print(row_0)


title      Star War
year           2000
revenue         100
Name: 0, dtype: object


In [92]:
# We can use list of indices to access to mutiple indices
row_0_and_2 = movies_df.iloc[[0, 2]]
print(row_0_and_2)

      title  year  revenue
0  Star War  2000      100
2  End Game  2019      150


In [93]:
# We can use Slicing to access a slice of DataFrame by any axis or both axises
slice_index_0_2_and_column_0_2 = movies_df.iloc[0:2, 0:2]
print(slice_index_0_2_and_column_0_2)


      title  year
0  Star War  2000
1    Avatar  2005


In [94]:

slice_all = movies_df.iloc[:, :]
print(slice_all)


      title  year  revenue
0  Star War  2000      100
1    Avatar  2005      200
2  End Game  2019      150


In [95]:
slice_last_row = movies_df.iloc[-1:, :]
print(slice_last_row)


      title  year  revenue
2  End Game  2019      150


In [96]:
slice_last_row_and_last_two_columns = movies_df.iloc[-1:, -2:]
print(slice_last_row_and_last_two_columns)



   year  revenue
2  2019      150


In [97]:
# Instead of accessing DataFrame by numeric index, we can use columns names and index label.
# We use 'loc' property, instead of 'iloc' property as above
all_rows = movies_df.loc[:,:]
print(all_rows)


      title  year  revenue
0  Star War  2000      100
1    Avatar  2005      200
2  End Game  2019      150


In [98]:
second_row_with_title_and_revenue_columns = movies_df.loc[1, ['title', 'revenue']]
print(second_row_with_title_and_revenue_columns)


title      Avatar
revenue       200
Name: 1, dtype: object


In [99]:
# We can use boolean expression to filter some data that satisfied the condition
movies_with_revenue_greater_than_100 = movies_df.loc[movies_df['revenue'] > 100]
print(movies_with_revenue_greater_than_100)

      title  year  revenue
1    Avatar  2005      200
2  End Game  2019      150
