### Polars dataframes

In [1]:
import polars as pl

In [2]:
# load a JSON file to a Polars DataFrame
video_games_reviews = pl.read_csv('../datasets/reviews_videogames_350_simplified.csv', infer_schema_length=10000, try_parse_dates=True)

In [3]:
# Configure the number of characters to show for each string column
pl.Config.set_fmt_str_lengths(60)

polars.config.Config

#### Inspect the data

In [4]:
# Show the first 5 rows of the DataFrame
# We will use head extensively to show few rows of a dataframe
video_games_reviews.head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


In [5]:
# Show the last 5 rows of the DataFrame
video_games_reviews.tail(2)

title,rating,review_date,review_text
str,f64,date,str
"""Grand Theft Auto V - PlayStation 4""",5.0,2015-12-03,"""Fantastic game. The gameplay is completely revamped and is …"
"""Resident Evil 5 - Playstation 3""",5.0,2013-03-06,"""muy buen producto todos debirian de comprar esto se los rec…"


In [6]:
# Show a sample of 5 rows of the DataFrame
video_games_reviews.sample(2)

title,rating,review_date,review_text
str,f64,date,str
"""Xbox 360 LIVE 1600 Points""",5.0,2012-06-17,"""I love the ability to buy microsoft points on amazon. You …"
"""Yoshi amiibo (Super Smash Bros Series)""",4.0,2018-05-18,"""Overpriced, but came in like-new condition."""


#### Printing the information to screen

In [7]:
# Last instruction of a cell is automatically printed
a = 5
video_games_reviews.head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


In [8]:
# Last instruction of a cell is automatically printed
video_games_reviews.head(2)
a = 5
a

5

In [9]:
# Print to screen using display()
display(video_games_reviews.head(2))
a = 5
a

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


5

In [10]:
# Print to screen using print()
print(video_games_reviews.head(2))

shape: (2, 4)
┌─────────────────────────────────────────┬────────┬─────────────┬─────────────────────────────────┐
│ title                                   ┆ rating ┆ review_date ┆ review_text                     │
│ ---                                     ┆ ---    ┆ ---         ┆ ---                             │
│ str                                     ┆ f64    ┆ date        ┆ str                             │
╞═════════════════════════════════════════╪════════╪═════════════╪═════════════════════════════════╡
│ Killzone: Shadow Fall (PlayStation 4)   ┆ 5.0    ┆ 2016-12-02  ┆ First time having this game! It │
│                                         ┆        ┆             ┆ pretty good and the graphics…   │
│ Resident Evil 5 - Xbox 360              ┆ 5.0    ┆ 2014-07-29  ┆ good                            │
└─────────────────────────────────────────┴────────┴─────────────┴─────────────────────────────────┘


#### Information about the table

In [11]:
# Schema of the table: column names and types
video_games_reviews.schema

{'title': Utf8, 'rating': Float64, 'review_date': Date, 'review_text': Utf8}

In [12]:
# Column names
video_games_reviews.columns

['title', 'rating', 'review_date', 'review_text']

In [13]:
# Column data types
video_games_reviews.dtypes

[Utf8, Float64, Date, Utf8]

In [14]:
# Shape of the DataFrame: number of rows and columns
video_games_reviews.shape

(49577, 4)

In [15]:
# Number of rows
video_games_reviews.height

49577

In [16]:
# Number of columns
video_games_reviews.width

4

In [17]:
# Visualize statistics about the columns: count, mean, std, min, max, etc.
video_games_reviews.describe()

describe,title,rating,review_date,review_text
str,str,f64,str,str
"""count""","""49577""",49577.0,"""49577""","""49577"""
"""null_count""","""418""",0.0,"""0""","""17"""
"""mean""",,4.317345,,
"""std""",,1.16256,,
"""min""","""Alan Wake - Xbox 360""",1.0,"""2001-10-01""",""" batman"""
"""max""","""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",5.0,"""2018-08-27""","""~PROS~ +Fantastic ""OMG no wayyyy that's crayyyzyyy"" kind of…"
"""median""",,5.0,,


In [18]:
# Estimate the memory usage of the DataFrame
video_games_reviews.estimated_size('mb')

33.72313117980957

#### Combine multiple methods, and assign a modified dataframe to a new variable

In [19]:
# Visualize statistics, and show only the first 3 rows
video_games_reviews.describe().head(3)

describe,title,rating,review_date,review_text
str,str,f64,str,str
"""count""","""49577""",49577.0,"""49577""","""49577"""
"""null_count""","""418""",0.0,"""0""","""17"""
"""mean""",,4.317345,,


In [20]:
# Assign the description of the DataFrame to a variable called video_games_description
video_games_description = video_games_reviews.describe()

In [21]:
# Show the new variable
video_games_description

describe,title,rating,review_date,review_text
str,str,f64,str,str
"""count""","""49577""",49577.0,"""49577""","""49577"""
"""null_count""","""418""",0.0,"""0""","""17"""
"""mean""",,4.317345,,
"""std""",,1.16256,,
"""min""","""Alan Wake - Xbox 360""",1.0,"""2001-10-01""",""" batman"""
"""max""","""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",5.0,"""2018-08-27""","""~PROS~ +Fantastic ""OMG no wayyyy that's crayyyzyyy"" kind of…"
"""median""",,5.0,,


#### Sort and add a row number

In [22]:
# Sort the DataFrame by the time of review in descending order and show the first 2 rows
video_games_reviews.sort('review_date', descending=True).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Batman: Arkham Knight - PlayStation 4 [Digital Code]""",5.0,2018-08-27,"""Was good but I didn't play it as much as my best friend did…"
"""Call of Duty: Black Ops III - Standard Edition - PlayStatio…",5.0,2018-08-14,"""Great game"""


In [23]:
# Sort the DataFrame by the time of review in ascending order and show the first 2 rows
video_games_reviews.sort('review_date', descending=False).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Halo - Xbox""",5.0,2001-10-01,"""Forget all you computer NERDS out there who are so OBSESSED…"
"""Halo - Xbox""",5.0,2001-10-01,"""Forget all you computer NERDS out there who are so OBSESSED…"


In [24]:
# Sort the DataFrame by the video game code (asin) in ascending order and by the time of review in descending order
video_games_reviews.sort(['title','review_date'], descending=[True, True]).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",4.0,2018-06-23,"""game case got squeezed and the cd wasn't secured it was pop…"
"""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",4.0,2018-06-11,"""The boy played it multiple times so..."""


In [25]:
# Add a row number to the dataframe
video_games_reviews.with_row_count().head(2)

row_nr,title,rating,review_date,review_text
u32,str,f64,date,str
0,"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
1,"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


#### Select and rename columns

##### Select

In [26]:
# Select columns using their names
video_games_reviews.select('title', 'review_text').head(2)

title,review_text
str,str
"""Killzone: Shadow Fall (PlayStation 4)""","""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""","""good"""


In [27]:
# Select columns using a dictionary with their names
video_games_reviews.select(['title', 'review_text']).head(2)

title,review_text
str,str
"""Killzone: Shadow Fall (PlayStation 4)""","""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""","""good"""


In [28]:
# Select columns using their names with pl.col()
video_games_reviews.select(pl.col('title'), pl.col('review_text')).head(2)

title,review_text
str,str
"""Killzone: Shadow Fall (PlayStation 4)""","""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""","""good"""


In [29]:
# Add suffix or prefix
video_games_reviews.select(
    pl.col('title').prefix('videogame_'), 
    pl.col('rating').suffix('_out_of_5'), 
    pl.col('review_text')
    ).head(2)

videogame_title,rating_out_of_5,review_text
str,f64,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,"""good"""


In [30]:
# Select columns with pl.col() and rename them using keywords
# Important: columns selected with keywords (example: videogame_title = ) should come after columns without keywords
video_games_reviews.select(
    videogame_title = pl.col('title'), 
    rating_out_of_5 = pl.col('rating'), 
    review = pl.col('review_text')
    ).head(2)

videogame_title,rating_out_of_5,review
str,f64,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,"""good"""


In [31]:
# Select columns with pl.col() and rename them using alias
video_games_reviews.select(
    pl.col('title').alias('videogame_title'), 
    pl.col('rating').alias('rating_out_of_5'), 
    pl.col('review_text').alias('review')
    ).head(2)

videogame_title,rating_out_of_5,review
str,f64,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,"""good"""


In [32]:
# Select columns based on their data type: select all string columns
# pl.col(pl.Uft8) refers to all columns with a string data type
# and is converted by Polars to a list of matching column names : pl.col('title'), pl.col('review_text') 
video_games_reviews.select(pl.col(pl.Utf8)).head(2)

title,review_text
str,str
"""Killzone: Shadow Fall (PlayStation 4)""","""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""","""good"""


In [33]:
# Select columns based on their data type: select all numerical columns
video_games_reviews.select(pl.col(pl.NUMERIC_DTYPES)).head(2)

rating
f64
5.0
5.0


In [34]:
# Select columns based on regex: all column names that contain 'review'
video_games_reviews.select(pl.col('^.*review.*$')).head(2)

review_date,review_text
date,str
2016-12-02,"""First time having this game! It pretty good and the graphic…"
2014-07-29,"""good"""


In [35]:
# Exclude certains columns: all columns except those that contain 'review'
# Exclude can be used with regex, or with column names, or with data types
video_games_reviews.select(pl.all().exclude('^.*review.*$')).head(2)

title,rating
str,f64
"""Killzone: Shadow Fall (PlayStation 4)""",5.0
"""Resident Evil 5 - Xbox 360""",5.0


In [36]:
# Add a new column with the string 'videogame'
video_games_reviews.select(
    pl.col('title'),
    pl.col('rating'),
    category = pl.lit('videogame')
).head(2)

title,rating,category
str,f64,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,"""videogame"""
"""Resident Evil 5 - Xbox 360""",5.0,"""videogame"""


In [37]:
# Modify existing columns
# if the column has the same name => modifies a column
# if the column has a different name => adds a new column 
# All columns are calculated in parallel, so rating_out_of_5 depends on the initial rating column
# and not on the modified rating column
video_games_reviews.select(
    pl.col('rating') / 5 * 100, # modified column 
    rating_out_of_5 = pl.col('rating'), # new column
    ).head(2)

rating,rating_out_of_5
f64,f64
100.0,5.0
100.0,5.0


##### With Columns

In [38]:
# Another way to select columns is using with_columns()
# with_columns() includes all columns by default, and 
# adds or modifies the specified columns
video_games_reviews.with_columns(
    pl.col('rating') / 5 * 100, # modified column 
    rating_out_of_5 = pl.col('rating'), # new column
    ).head(2)

title,rating,review_date,review_text,rating_out_of_5
str,f64,date,str,f64
"""Killzone: Shadow Fall (PlayStation 4)""",100.0,2016-12-02,"""First time having this game! It pretty good and the graphic…",5.0
"""Resident Evil 5 - Xbox 360""",100.0,2014-07-29,"""good""",5.0


#### Filter columns

In [39]:
# Filter dataframe based on one value
video_games_reviews.filter(
    pl.col('title') == 'The Last of Us Remastered - PlayStation 4'
    ).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""The Last of Us Remastered - PlayStation 4""",5.0,2017-01-12,"""brings the DLCs has well."""
"""The Last of Us Remastered - PlayStation 4""",5.0,2015-02-12,"""wow umm yea this game is a must such a good time I feel sor…"


In [40]:
# Filter dataframe based on multiple values
video_games_reviews.filter(
    pl.col('title').is_in(['The Last of Us Remastered - PlayStation 4', 'Resident Evil 5 - Xbox 360'])
    ).head(2)


title,rating,review_date,review_text
str,f64,date,str
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""
"""The Last of Us Remastered - PlayStation 4""",5.0,2017-01-12,"""brings the DLCs has well."""


In [41]:
# Filter dataframe based on multiple values : use AND &
video_games_reviews.filter(
    (pl.col('rating') == 1) &
    (pl.col('title') == 'inFAMOUS: Second Son Standard Edition (PlayStation 4)')
    ).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",1.0,2014-07-12,"""Going through more of Sucker Punch's latest inFamous sequal…"
"""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",1.0,2014-06-16,"""Horrible game, the beginning should have foretold the rest …"


In [42]:
# Filter dataframe based on multiple values : use OR |
video_games_reviews.filter(
    (pl.col('rating') == 1) |
    (pl.col('title') == 'inFAMOUS: Second Son Standard Edition (PlayStation 4)')
    ).head(2)


title,rating,review_date,review_text
str,f64,date,str
"""Bloodborne""",1.0,2015-07-13,"""Game blowd"""
"""inFAMOUS: Second Son Standard Edition (PlayStation 4)""",5.0,2017-01-23,"""Just awesome. Officially one of my son's favorite games. Tr…"


In [43]:
# Filter dataframe using numerical values: ratings bigger or equal to 4 and less than 3
video_games_reviews.filter(
    (pl.col('rating') >= 1) &
    (pl.col('rating') < 3)
    ).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Final Fantasy X""",2.0,2014-08-24,"""This game didn't work. It kept glitching up. But this is a …"
"""Bloodborne""",1.0,2015-07-13,"""Game blowd"""


In [46]:
# Filter dataframe using regex: filter all videogame titles that contain 'PlayStation 4' or 'Xbox 360'
video_games_reviews.filter(
    pl.col('title').str.contains('PlayStation 4|Xbox 360')
).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


In [48]:
# Filter and show only unique reviews
# Remove all reviews that appear more than once
video_games_reviews.filter(
    video_games_reviews.is_unique()
).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Grip-iT Analog Stick Covers, Set of 4""",5.0,2012-01-03,"""Very good product, does exactly what it says. Good quality,…"


In [49]:
# Filter and show only duplicated reviews: 
# Show only reviews appearing more than once in the table
video_games_reviews.filter(
    video_games_reviews.is_duplicated()
).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""
"""Bioshock 2 - Xbox 360""",5.0,2010-03-14,"""It's surprising to see Bioshock 2 since the first didn't ex…"


In [50]:
# Filter and show days that have one review on the day
video_games_reviews.filter(
    pl.col('review_date').is_unique()
).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Uncharted 2: Among Thieves - Playstation 3""",3.0,2011-02-04,"""Perhaps the most disappointing game I've played on PS3. It …"
"""Wii""",5.0,2007-04-21,"""When I first saw information on the new systems I wanted a …"


In [51]:
# Filter and show days that have more than one review on the day
video_games_reviews.filter(
    pl.col('review_date').is_duplicated()
).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


In [53]:
# For reviews that appear more than once, keep only one review
video_games_reviews.unique().head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


In [55]:
# For reviews that appear more than once, keep only the first review
# Not specifying the keep parameter will result in keeping any review, and will be faster
video_games_reviews.unique(keep='first').head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


In [56]:
# Keep only one review for each title and rating
video_games_reviews.unique(subset=['title','rating']).head(2)

title,rating,review_date,review_text
str,f64,date,str
"""Killzone: Shadow Fall (PlayStation 4)""",5.0,2016-12-02,"""First time having this game! It pretty good and the graphic…"
"""Resident Evil 5 - Xbox 360""",5.0,2014-07-29,"""good"""


#### Slicing and indexing

In [61]:
# Keep 3 rows starting at row 5
video_games_reviews.slice(5, 3)

title,rating,review_date,review_text
str,f64,date,str
"""Titanfall - Xbox One""",5.0,2015-03-04,"""As expected and prompt delivery!!!"""
"""Assassin's Creed - Playstation 3""",5.0,2012-10-31,"""It's a typical AC game with a few tweaks and additions that…"
"""Mario Kart 8 - Nintendo Wii U""",5.0,2017-03-24,"""This is without a doubt the best Mario Kart game to date. T…"


In [67]:
# Square brackets indexing should be limited to:
# 1. Extract a scalar value (another option is using .item() )
# 2. Convert a DataFrame to a Series (another option is using .get_column() )
# 3. Inspecting some rows or columns
# In general, select, with_columns, and filter should be used instead of square brackets indexing
# ! Disadvantages: Square brackets selecting only works in eager mode, and is not parallelized
# Select 3 rows starting at row 5
video_games_reviews[5:8]

title,rating,review_date,review_text
str,f64,date,str
"""Titanfall - Xbox One""",5.0,2015-03-04,"""As expected and prompt delivery!!!"""
"""Assassin's Creed - Playstation 3""",5.0,2012-10-31,"""It's a typical AC game with a few tweaks and additions that…"
"""Mario Kart 8 - Nintendo Wii U""",5.0,2017-03-24,"""This is without a doubt the best Mario Kart game to date. T…"


In [68]:
# Select 3 rows starting at row 5 for columns title and rating
video_games_reviews[5:8, ['title', 'rating']]

title,rating
str,f64
"""Titanfall - Xbox One""",5.0
"""Assassin's Creed - Playstation 3""",5.0
"""Mario Kart 8 - Nintendo Wii U""",5.0


In [71]:
# Select 3 rows starting at row 5 for columns 0 and 1
video_games_reviews[5:8, [0, 1]]

title,rating
str,f64
"""Titanfall - Xbox One""",5.0
"""Assassin's Creed - Playstation 3""",5.0
"""Mario Kart 8 - Nintendo Wii U""",5.0


In [74]:
# Select columns title and rating using indexes
video_games_reviews[['title', 'rating']].head(2)

title,rating
str,f64
"""Killzone: Shadow Fall (PlayStation 4)""",5.0
"""Resident Evil 5 - Xbox 360""",5.0


#### Extracting columns, rows, dataframes and items

In [77]:
# Extract a column from a DataFrame and convert it to a Series
video_games_reviews.get_column('title').head(2)

title
str
"""Killzone: Shadow Fall (PlayStation 4)"""
"""Resident Evil 5 - Xbox 360"""


In [84]:
# Extract all columns and convert them to a list of Series
video_games_reviews.head(1).get_columns()

[shape: (1,)
 Series: 'title' [str]
 [
 	"Killzone: Shadow Fall (PlayStation 4)"
 ],
 shape: (1,)
 Series: 'rating' [f64]
 [
 	5.0
 ],
 shape: (1,)
 Series: 'review_date' [date]
 [
 	2016-12-02
 ],
 shape: (1,)
 Series: 'review_text' [str]
 [
 	"First time having this game! It pretty good and the graphic…
 ]]

In [85]:
# Extract rows from a dataframe as a list of tuples
# ! This materializes all the rows in memory. It's expensive and should be avoided when possible
video_games_reviews.head(2).rows()

[('Killzone: Shadow Fall (PlayStation 4)',
  5.0,
  datetime.date(2016, 12, 2),
  'First time having this game! It pretty good and the graphics are very nice'),
 ('Resident Evil 5 - Xbox 360', 5.0, datetime.date(2014, 7, 29), 'good')]

In [86]:
# Extract rows from a dataframe as a list of dicts (more expensive)
# ! This materializes all the rows in memory. It's expensive and should be avoided when possible
video_games_reviews.head(2).rows(named=True)

[{'title': 'Killzone: Shadow Fall (PlayStation 4)',
  'rating': 5.0,
  'review_date': datetime.date(2016, 12, 2),
  'review_text': 'First time having this game! It pretty good and the graphics are very nice'},
 {'title': 'Resident Evil 5 - Xbox 360',
  'rating': 5.0,
  'review_date': datetime.date(2014, 7, 29),
  'review_text': 'good'}]

In [88]:
# Extract rows from a dataframe as an iterator
# ! Export methods of Polars should be preferred instead of iterating over rows
video_games_reviews.iter_rows()

<generator object DataFrame.iter_rows at 0x7f4ef9fc2980>

In [89]:
# Extract slices of 1000 rows from a dataframe as an iterator
video_games_reviews.iter_slices(n_rows=1000)

<generator object DataFrame.iter_slices at 0x7f4eb4308d60>

In [91]:
# Extract a list of Dataframes partitioned based on the specified column
video_games_reviews.head(2).partition_by('title')

[shape: (1, 4)
 ┌─────────────────────────────────────────┬────────┬─────────────┬─────────────────────────────────┐
 │ title                                   ┆ rating ┆ review_date ┆ review_text                     │
 │ ---                                     ┆ ---    ┆ ---         ┆ ---                             │
 │ str                                     ┆ f64    ┆ date        ┆ str                             │
 ╞═════════════════════════════════════════╪════════╪═════════════╪═════════════════════════════════╡
 │ Killzone: Shadow Fall (PlayStation 4)   ┆ 5.0    ┆ 2016-12-02  ┆ First time having this game! It │
 │                                         ┆        ┆             ┆ pretty good and the graphics…   │
 └─────────────────────────────────────────┴────────┴─────────────┴─────────────────────────────────┘,
 shape: (1, 4)
 ┌────────────────────────────┬────────┬─────────────┬─────────────┐
 │ title                      ┆ rating ┆ review_date ┆ review_text │
 │ ---         

In [94]:
# Extract a scalar using .item()
# .item() transforms a table of one row and one column into a scalar
video_games_reviews.head(1).get_column('title').item()

'Killzone: Shadow Fall (PlayStation 4)'

In [97]:
# Convert a column to a list using .to_list()
video_games_reviews.get_column('title').head(5).to_list()

['Killzone: Shadow Fall (PlayStation 4)',
 'Resident Evil 5 - Xbox 360',
 'Bioshock 2 - Xbox 360',
 'Grip-iT Analog Stick Covers, Set of 4',
 'Dead Space (X-BOX 360) Platinum hits']

In [99]:
# Convert a dataframe to a Python dictionary using .to_dict()
video_games_reviews.head(2).to_dict(as_series=False)

{'title': ['Killzone: Shadow Fall (PlayStation 4)',
  'Resident Evil 5 - Xbox 360'],
 'rating': [5.0, 5.0],
 'review_date': [datetime.date(2016, 12, 2), datetime.date(2014, 7, 29)],
 'review_text': ['First time having this game! It pretty good and the graphics are very nice',
  'good']}