In [1]:
import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Series From Lists and Arrays

- A Pandas Series is a 1D array of indexed data essentially a column
- NumPy array have an implicitly defined integer index (to get and set values), the Pandas Series has an explicitly defined integer index, which gives the Series object additional capabilities.

In [3]:
import pandas as pd 

fruits_dict = { 'apples': 10,
                'oranges': 8,
                'bananas': 3,
                'strawberries': 20}

fruits = pd.Series(fruits_dict)
print("Value for apples: ", fruits['apples'], "\n")
# Series also supports array-style operations such as slicing:
print(fruits['oranges':'strawberries'])

Value for apples:  10 

oranges          8
bananas          3
strawberries    20
dtype: int64


### The DataFrame Object

- While a Series is essentially a column, a DataFrame is a multi-dimensional table made up of a collection of Series

In [6]:
#Constructing a DataFrame From a Series Object
data_s1 = pd.Series([12, 24, 33, 15], 
           index=['apples', 'bananas', 'strawberries', 'oranges'])

# 'quantity' is the name for our column
dataframe1 = pd.DataFrame(data_s1, columns=['quantity'])
print(dataframe1)

              quantity
apples              12
bananas             24
strawberries        33
oranges             15


In [7]:
#Constructing a DataFrame From a Dictionary
dict = {"country": ["Norway", "Sweden", "Spain", "France"],
       "capital": ["Oslo", "Stockholm", "Madrid", "Paris"],
       "SomeColumn": ["100", "200", "300", "400"]}

data = pd.DataFrame(dict)
print(data)

  country    capital SomeColumn
0  Norway       Oslo        100
1  Sweden  Stockholm        200
2   Spain     Madrid        300
3  France      Paris        400


In [8]:
#construct a DataFrame from a dictionary of Series objects
quantity = pd.Series([12, 24, 33, 15], 
           index=['apples', 'bananas', 'strawberries', 'oranges'])

price = pd.Series([4, 4.5, 8, 7.5], 
           index=['apples', 'bananas', 'strawberries', 'oranges'])
           
df = pd.DataFrame({'quantity': quantity,
                    'price': price})
print(df)

              quantity  price
apples              12    4.0
bananas             24    4.5
strawberries        33    8.0
oranges             15    7.5


In [10]:
# Reading data from the downloaded CSV:
movies_df = pd.read_csv("IMDB-Movie-Data.csv")

# 1. We can set the index at load time
movies_df_title_indexed = pd.read_csv("IMDB-Movie-Data.csv", index_col='Title')

# 2. We can set the index after the DataFrame has been created
movies_df_title_indexed = movies_df.set_index('Title')

In [11]:
# This should be one of the very first commands you run after loading your data:
movies_df_title_indexed.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, Guardians of the Galaxy to Nine Lives
Data columns (total 11 columns):
Rank                  1000 non-null int64
Genre                 1000 non-null object
Description           1000 non-null object
Director              1000 non-null object
Actors                1000 non-null object
Year                  1000 non-null int64
Runtime (Minutes)     1000 non-null int64
Rating                1000 non-null float64
Votes                 1000 non-null int64
Revenue (Millions)    872 non-null float64
Metascore             936 non-null float64
dtypes: float64(3), int64(4), object(4)
memory usage: 93.8+ KB


### Data Selection and Slicing#

In [12]:
# To obtain a Series as output
col_as_series = movies_df['Genre']

# Print the object type and the first 5 rows of the series
print(type(col_as_series))
col_as_series.head()


# To obtain a dataFrame as output
col_as_df = movies_df[['Genre']]
print(type(col_as_df))
col_as_df.head()

<class 'pandas.core.series.Series'>


0     Action,Adventure,Sci-Fi
1    Adventure,Mystery,Sci-Fi
2             Horror,Thriller
3     Animation,Comedy,Family
4    Action,Adventure,Fantasy
Name: Genre, dtype: object

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Genre
0,"Action,Adventure,Sci-Fi"
1,"Adventure,Mystery,Sci-Fi"
2,"Horror,Thriller"
3,"Animation,Comedy,Family"
4,"Action,Adventure,Fantasy"


### Working With Rows

- loc: the loc attribute allows indexing and slicing that always references the explicit index, i.e., locates by name. For example, in our DataFrame indexed by title, we will use the title of the movie to select the required row.

- iloc: the iloc attribute allows indexing and slicing that always references the implicit Python-style index, i.e., locates by numerical index. In the case of our DataFrame, we will pass the numerical index of the movie for which we are interested in fetching data.

- ix: this is a hybrid of the other two approaches. We will understand this better by looking at some examples

In [15]:
movies_df_title_indexed.head(2)
#With loc we give the explicit index. In our case the title, "Guardians of the Galaxy":
gog = movies_df_title_indexed.loc["Guardians of the Galaxy"]
gog
# With iloc we give it the numerical index of "Guardians of the Galaxy":
gog = movies_df_title_indexed.iloc[0]
gog

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0


Rank                                                                  1
Genre                                           Action,Adventure,Sci-Fi
Description           A group of intergalactic criminals are forced ...
Director                                                     James Gunn
Actors                Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...
Year                                                               2014
Runtime (Minutes)                                                   121
Rating                                                              8.1
Votes                                                            757074
Revenue (Millions)                                               333.13
Metascore                                                            76
Name: Guardians of the Galaxy, dtype: object

Rank                                                                  1
Genre                                           Action,Adventure,Sci-Fi
Description           A group of intergalactic criminals are forced ...
Director                                                     James Gunn
Actors                Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...
Year                                                               2014
Runtime (Minutes)                                                   121
Rating                                                              8.1
Votes                                                            757074
Revenue (Millions)                                               333.13
Metascore                                                            76
Name: Guardians of the Galaxy, dtype: object

In [16]:
#multiple rows with loc
multiple_rows = movies_df_title_indexed.loc['Guardians of the Galaxy':'Sing']
multiple_rows

#multiple rows with iloc
multiple_rows = movies_df_title_indexed.iloc[0:4]
multiple_rows

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0


Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0


In [17]:
# Select all rows uptil Sing and all columns uptil Director
movies_df_title_indexed.ix[:'Sing', :4]
movies_df_title_indexed.ix[:4, :'Director']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0_level_0,Rank,Genre,Description,Director
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Rank,Genre,Description,Director
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet


### Conditional Data Selection and Filtering#

In [18]:
# get all 2016 movies:
movies_df_title_indexed[movies_df_title_indexed['Year'] == 2016]

# All movies with a rating higher than 8.0 
movies_df_title_indexed[movies_df_title_indexed['Rating'] > 8.0 ]

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
Suicide Squad,5,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0
The Great Wall,6,"Action,Adventure,Fantasy",European mercenaries searching for black powde...,Yimou Zhang,"Matt Damon, Tian Jing, Willem Dafoe, Andy Lau",2016,103,6.1,56036,45.13,42.0
La La Land,7,"Comedy,Drama,Music",A jazz pianist falls for an aspiring actress i...,Damien Chazelle,"Ryan Gosling, Emma Stone, Rosemarie DeWitt, J....",2016,128,8.3,258682,151.06,93.0
Mindhorn,8,Comedy,A has-been actor best known for playing the ti...,Sean Foley,"Essie Davis, Andrea Riseborough, Julian Barrat...",2016,89,6.4,2490,,71.0
The Lost City of Z,9,"Action,Adventure,Biography","A true-life drama, centering on British explor...",James Gray,"Charlie Hunnam, Robert Pattinson, Sienna Mille...",2016,141,7.1,7188,8.01,78.0
Passengers,10,"Adventure,Drama,Romance",A spacecraft traveling to a distant colony pla...,Morten Tyldum,"Jennifer Lawrence, Chris Pratt, Michael Sheen,...",2016,116,7.0,192177,100.01,41.0
Fantastic Beasts and Where to Find Them,11,"Adventure,Family,Fantasy",The adventures of writer Newt Scamander in New...,David Yates,"Eddie Redmayne, Katherine Waterston, Alison Su...",2016,133,7.5,232072,234.02,66.0
Hidden Figures,12,"Biography,Drama,History",The story of a team of female African-American...,Theodore Melfi,"Taraji P. Henson, Octavia Spencer, Janelle Mon...",2016,127,7.8,93103,169.27,74.0


Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
La La Land,7,"Comedy,Drama,Music",A jazz pianist falls for an aspiring actress i...,Damien Chazelle,"Ryan Gosling, Emma Stone, Rosemarie DeWitt, J....",2016,128,8.3,258682,151.06,93.0
Hacksaw Ridge,17,"Biography,Drama,History","WWII American Army Medic Desmond T. Doss, who ...",Mel Gibson,"Andrew Garfield, Sam Worthington, Luke Bracey,...",2016,139,8.2,211760,67.12,71.0
Lion,19,"Biography,Drama",A five-year-old Indian boy gets lost on the st...,Garth Davis,"Dev Patel, Nicole Kidman, Rooney Mara, Sunny P...",2016,118,8.1,102061,51.69,69.0
Bahubali: The Beginning,27,"Action,Adventure,Drama","In ancient India, an adventurous and daring ma...",S.S. Rajamouli,"Prabhas, Rana Daggubati, Anushka Shetty,Tamann...",2015,159,8.3,76193,6.5,
Interstellar,37,"Adventure,Drama,Sci-Fi",A team of explorers travel through a wormhole ...,Christopher Nolan,"Matthew McConaughey, Anne Hathaway, Jessica Ch...",2014,169,8.6,1047747,187.99,74.0
Star Wars: Episode VII - The Force Awakens,51,"Action,Adventure,Fantasy",Three decades after the defeat of the Galactic...,J.J. Abrams,"Daisy Ridley, John Boyega, Oscar Isaac, Domhna...",2015,136,8.1,661608,936.63,81.0
The Dark Knight,55,"Action,Crime,Drama",When the menace known as the Joker wreaks havo...,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart,Mi...",2008,152,9.0,1791916,533.32,82.0
The Prestige,65,"Drama,Mystery,Sci-Fi",Two stage magicians engage in competitive one-...,Christopher Nolan,"Christian Bale, Hugh Jackman, Scarlett Johanss...",2006,130,8.5,913152,53.08,66.0
Mad Max: Fury Road,68,"Action,Adventure,Sci-Fi",A woman rebels against a tyrannical ruler in p...,George Miller,"Tom Hardy, Charlize Theron, Nicholas Hoult, Zo...",2015,120,8.1,632842,153.63,90.0


- Retrieve the latest movies (movies released between 2010 and 2016) that had a very poor rating (score less than 6.0) but were among the highest earners at the box office (revenue above the 75th percentile). 

In [19]:
movies_df_title_indexed[((movies_df_title_indexed['Year'] >= 2010) & (movies_df_title_indexed['Year'] <= 2016))
    & (movies_df_title_indexed['Rating'] < 6.0)
    & (movies_df_title_indexed['Revenue (Millions)'] > movies_df_title_indexed['Revenue (Millions)'].quantile(0.75))]

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Fifty Shades of Grey,64,"Drama,Romance,Thriller",Literature student Anastasia Steele's life cha...,Sam Taylor-Johnson,"Dakota Johnson, Jamie Dornan, Jennifer Ehle,El...",2015,125,4.1,244474,166.15,46.0
Ghostbusters,80,"Action,Comedy,Fantasy","Following a ghost invasion of Manhattan, paran...",Paul Feig,"Melissa McCarthy, Kristen Wiig, Kate McKinnon,...",2016,116,5.3,147717,128.34,60.0
Transformers: Age of Extinction,127,"Action,Adventure,Sci-Fi",Autobots must escape sight from a bounty hunte...,Michael Bay,"Mark Wahlberg, Nicola Peltz, Jack Reynor, Stan...",2014,165,5.7,255483,245.43,32.0
The Twilight Saga: Breaking Dawn - Part 2,367,"Adventure,Drama,Fantasy","After the birth of Renesmee, the Cullens gathe...",Bill Condon,"Kristen Stewart, Robert Pattinson, Taylor Laut...",2012,115,5.5,194329,292.3,52.0
Grown Ups 2,395,Comedy,After moving his family back to his hometown t...,Dennis Dugan,"Adam Sandler, Kevin James, Chris Rock, David S...",2013,101,5.4,114482,133.67,19.0
Clash of the Titans,576,"Action,Adventure,Fantasy","Perseus demigod, son of Zeus, battles the mini...",Louis Leterrier,"Sam Worthington, Liam Neeson, Ralph Fiennes,Ja...",2010,106,5.8,238206,163.19,39.0
Kickboxer: Vengeance,581,Action,A kick boxer is out to avenge his brother.,John Stockwell,"Dave Bautista, Alain Moussi, Gina Carano, Jean...",2016,90,4.9,6809,131.56,37.0
Teenage Mutant Ninja Turtles,658,"Action,Adventure,Comedy","When a kingpin threatens New York City, a grou...",Jonathan Liebesman,"Megan Fox, Will Arnett, William Fichtner, Noel...",2014,101,5.9,178527,190.87,31.0
Green Lantern,674,"Action,Adventure,Sci-Fi",Reckless test pilot Hal Jordan is granted an a...,Martin Campbell,"Ryan Reynolds, Blake Lively, Peter Sarsgaard,M...",2011,114,5.6,231907,116.59,39.0
G.I. Joe: Retaliation,879,"Action,Adventure,Sci-Fi",The G.I. Joes are not only fighting their mort...,Jon M. Chu,"Dwayne Johnson, Channing Tatum, Adrianne Palic...",2013,110,5.8,152145,122.51,41.0


In [20]:
# Let’s group our dataset by director and see how much revenue each director has
movies_df.groupby('Director').sum()

# Let’s group our dataset by director and see the average rating of each director
movies_df.groupby('Director')[['Rating']].mean()

Unnamed: 0_level_0,Rank,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Aamir Khan,992,2007,165,8.5,102697,1.20,42.0
Abdellatif Kechiche,312,2013,180,7.8,103150,2.20,88.0
Adam Leon,784,2016,82,6.5,1031,0.00,77.0
Adam McKay,1910,8039,443,28.0,806827,438.14,262.0
Adam Shankman,1460,4019,240,12.6,167467,157.33,128.0
Adam Wingard,1454,4030,189,11.8,97157,21.07,123.0
Afonso Poyart,652,2015,101,6.4,36300,0.00,36.0
Aisling Walsh,840,2016,115,7.8,346,0.00,60.0
Akan Satayev,197,2016,95,6.3,3799,0.00,0.0
Akiva Schaffer,654,2016,87,6.7,30875,9.39,68.0


Unnamed: 0_level_0,Rating
Director,Unnamed: 1_level_1
Aamir Khan,8.500000
Abdellatif Kechiche,7.800000
Adam Leon,6.500000
Adam McKay,7.000000
Adam Shankman,6.300000
Adam Wingard,5.900000
Afonso Poyart,6.400000
Aisling Walsh,7.800000
Akan Satayev,6.300000
Akiva Schaffer,6.700000


In [21]:
#Let’s group our dataset by director and see who earned the most
movies_df.groupby('Director')[['Revenue (Millions)']].sum().sort_values(['Revenue (Millions)'], ascending=False)

Unnamed: 0_level_0,Revenue (Millions)
Director,Unnamed: 1_level_1
J.J. Abrams,1683.45
David Yates,1630.51
Christopher Nolan,1515.09
Michael Bay,1421.32
Francis Lawrence,1299.81
Joss Whedon,1082.27
Jon Favreau,1025.60
Zack Snyder,975.74
Peter Jackson,860.45
Gore Verbinski,829.82


### Dealing With Missing and Duplicates

- Pandas is built to handle both NaN and None, and it treats the two as essentially interchangeable for indicating missing or null values
 -  isnull(), notnull(), dropna(), and fillna()

In [22]:
#count the number of null values in each column
movies_df_title_indexed.isnull().sum()

Rank                    0
Genre                   0
Description             0
Director                0
Actors                  0
Year                    0
Runtime (Minutes)       0
Rating                  0
Votes                   0
Revenue (Millions)    128
Metascore              64
dtype: int64

In [23]:
#count the number of not null values in each column
movies_df_title_indexed.notnull().sum()

Rank                  1000
Genre                 1000
Description           1000
Director              1000
Actors                1000
Year                  1000
Runtime (Minutes)     1000
Rating                1000
Votes                 1000
Revenue (Millions)     872
Metascore              936
dtype: int64

-  **Dropping Null Values**

In [24]:
# Drop all rows with any missing data
movies_df_title_indexed.dropna()

# Drop all the columns containing any missing data
movies_df_title_indexed.dropna(axis=1)

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
Suicide Squad,5,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0
The Great Wall,6,"Action,Adventure,Fantasy",European mercenaries searching for black powde...,Yimou Zhang,"Matt Damon, Tian Jing, Willem Dafoe, Andy Lau",2016,103,6.1,56036,45.13,42.0
La La Land,7,"Comedy,Drama,Music",A jazz pianist falls for an aspiring actress i...,Damien Chazelle,"Ryan Gosling, Emma Stone, Rosemarie DeWitt, J....",2016,128,8.3,258682,151.06,93.0
The Lost City of Z,9,"Action,Adventure,Biography","A true-life drama, centering on British explor...",James Gray,"Charlie Hunnam, Robert Pattinson, Sienna Mille...",2016,141,7.1,7188,8.01,78.0
Passengers,10,"Adventure,Drama,Romance",A spacecraft traveling to a distant colony pla...,Morten Tyldum,"Jennifer Lawrence, Chris Pratt, Michael Sheen,...",2016,116,7.0,192177,100.01,41.0
Fantastic Beasts and Where to Find Them,11,"Adventure,Family,Fantasy",The adventures of writer Newt Scamander in New...,David Yates,"Eddie Redmayne, Katherine Waterston, Alison Su...",2016,133,7.5,232072,234.02,66.0


Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545
Suicide Squad,5,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727
The Great Wall,6,"Action,Adventure,Fantasy",European mercenaries searching for black powde...,Yimou Zhang,"Matt Damon, Tian Jing, Willem Dafoe, Andy Lau",2016,103,6.1,56036
La La Land,7,"Comedy,Drama,Music",A jazz pianist falls for an aspiring actress i...,Damien Chazelle,"Ryan Gosling, Emma Stone, Rosemarie DeWitt, J....",2016,128,8.3,258682
Mindhorn,8,Comedy,A has-been actor best known for playing the ti...,Sean Foley,"Essie Davis, Andrea Riseborough, Julian Barrat...",2016,89,6.4,2490
The Lost City of Z,9,"Action,Adventure,Biography","A true-life drama, centering on British explor...",James Gray,"Charlie Hunnam, Robert Pattinson, Sienna Mille...",2016,141,7.1,7188
Passengers,10,"Adventure,Drama,Romance",A spacecraft traveling to a distant colony pla...,Morten Tyldum,"Jennifer Lawrence, Chris Pratt, Michael Sheen,...",2016,116,7.0,192177


In [30]:
df

# Drop columns where all the values are missing
df.dropna(axis='columns', how='all')

# Thresh to specify a minimum number of non-null values 
# for the row/column to be kept
df.dropna(axis='rows', thresh=10)

Unnamed: 0,quantity,price
apples,12,4.0
bananas,24,4.5
strawberries,33,8.0
oranges,15,7.5


Unnamed: 0,quantity,price
apples,12,4.0
bananas,24,4.5
strawberries,33,8.0
oranges,15,7.5


Unnamed: 0,quantity,price


In [33]:
# Getting the mean value for the column:
revenue = movies_df_title_indexed['Revenue (Millions)']
revenue_mean = revenue.mean()

print("Mean Revenue:", revenue_mean)


# Let's fill the nulls with the mean value:
revenue.fillna(revenue_mean, inplace=True)

# Let's get the updated status of our DataFrame:
movies_df_title_indexed.isnull().sum()

Mean Revenue: 82.9563761467888


Rank                   0
Genre                  0
Description            0
Director               0
Actors                 0
Year                   0
Runtime (Minutes)      0
Rating                 0
Votes                  0
Revenue (Millions)     0
Metascore             64
dtype: int64

Rank                   0
Genre                  0
Description            0
Director               0
Actors                 0
Year                   0
Runtime (Minutes)      0
Rating                 0
Votes                  0
Revenue (Millions)     0
Metascore             64
dtype: int64