In [None]:
# Intro to Pandas
# ===============
# Pandas is a Python package for data analysis and exposes two new
# data structures: Dataframes and Series.

# - Dataframes store tabular data consisting of rows and columns.
# - Series are similar to Python's built-in list or set data types.

# In this notebook, we will explore the data structures that Pandas
# provides, and learn how to interact with them.

In [1]:
# 1. Importing Pandas
# ===================

# To import an external Python library such as Pandas, use Python's
# import function. To save yourself some typing later on, you can
# give the library you import an alias. Here, we are importing Pandas
# and giving it an alias of `pd`.
import pandas as pd

In [2]:
# 2. Creating A Dataframe
# =======================

# We will load a CSV file as a dataframe using Panda's `read_csv`
# method. This will allow us to use Pandas' dataframe functions to
# explore the data in the CSV.
df = pd.read_csv("data.csv")

In [6]:
# Once we have loaded the CSV as a dataframe, we can start to explore
# the data.
# To see the first 5 rows of the dataframes, use the .head() method:
df.head()

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
0,0,Amazing,LittleBigPlanet PS Vita,/games/littlebigplanet-vita/vita-98907,PlayStation Vita,9.0,Platformer,Y,2012,9,12
1,1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,/games/littlebigplanet-ps-vita-marvel-super-he...,PlayStation Vita,9.0,Platformer,Y,2012,9,12
2,2,Great,Splice: Tree of Life,/games/splice/ipad-141070,iPad,8.5,Puzzle,N,2012,9,12
3,3,Great,NHL 13,/games/nhl-13/xbox-360-128182,Xbox 360,8.5,Sports,N,2012,9,11


In [None]:
# 3. Selecting Data - Part 1
# ==========================

# To examine specific rows and columns of a Dataframe, Pandas provides
# the `iloc` and `loc` methods to do so. We will start with the iloc
# method, which allows you to access both rows and columns by their
# index number. Note that just like anything else in programming, we
# always start with 0.

In [21]:
# The iloc[] method gives you control over which rows and columns to
# display.

0    PlayStation Vita
1    PlayStation Vita
2                iPad
3            Xbox 360
Name: platform, dtype: object

In [23]:
# To see all the rows and columns:
df.iloc[:,:]

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
0,0,Amazing,LittleBigPlanet PS Vita,/games/littlebigplanet-vita/vita-98907,PlayStation Vita,9.0,Platformer,Y,2012,9,12
1,1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,/games/littlebigplanet-ps-vita-marvel-super-he...,PlayStation Vita,9.0,Platformer,Y,2012,9,12
2,2,Great,Splice: Tree of Life,/games/splice/ipad-141070,iPad,8.5,Puzzle,N,2012,9,12
3,3,Great,NHL 13,/games/nhl-13/xbox-360-128182,Xbox 360,8.5,Sports,N,2012,9,11


In [24]:
# Get columns 0 through 5 for rows 1 through 3.
df.iloc[1:3,:5]

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform
1,1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,/games/littlebigplanet-ps-vita-marvel-super-he...,PlayStation Vita
2,2,Great,Splice: Tree of Life,/games/splice/ipad-141070,iPad


In [22]:
# Get the `platform` column for all rows
df.iloc[:,4]

0    PlayStation Vita
1    PlayStation Vita
2                iPad
3            Xbox 360
Name: platform, dtype: object

In [25]:
# You can also store a slice of the dataframe as a new dataframe!
titles_df = df.iloc[:,2]
titles_df.head()

0                              LittleBigPlanet PS Vita
1    LittleBigPlanet PS Vita -- Marvel Super Hero E...
2                                 Splice: Tree of Life
3                                               NHL 13
Name: title, dtype: object

In [18]:
# It can be hard to keep track of which index numbers your columns
# correspond to - you can use the 'loc' method to access by column name.

# Get the title and score columns for all rows.
df.loc[:, ["title", "score"]]

Unnamed: 0,title,score
0,LittleBigPlanet PS Vita,9.0
1,LittleBigPlanet PS Vita -- Marvel Super Hero E...,9.0
2,Splice: Tree of Life,8.5
3,NHL 13,8.5


In [27]:
# 4. Selecting Data - Part 2
# ==========================
# TODO: add description and more examples
df[df.editors_choice == "Y"]

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
0,0,Amazing,LittleBigPlanet PS Vita,/games/littlebigplanet-vita/vita-98907,PlayStation Vita,9.0,Platformer,Y,2012,9,12
1,1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,/games/littlebigplanet-ps-vita-marvel-super-he...,PlayStation Vita,9.0,Platformer,Y,2012,9,12


In [None]:
# 5. Dataframe Methods
# ====================

In [None]:
# 6. Series
# =========