In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

### Show the use of Boolean masking on series objects

### Example 1

In [32]:
twopowers_sr = pd.Series([1,2,4,8,16,32,64,128,256,512,1024])
BM=[False, False, True, True, False, False, True, True, True, True, True]

In [33]:
twopowers_sr[BM]

2        4
3        8
6       64
7      128
8      256
9      512
10    1024
dtype: int64

### Example 2

In [4]:
BM = twopowers_sr>=500
twopowers_sr[BM]

9      512
10    1024
dtype: int64

#### Use on DataFrame

In [37]:
data = pd.read_csv('netflix_titles_2021.csv')
data.head(20)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...


### unique() and nunique() are used to ciunt the categorical values in a columnm

#### Example

In [6]:
data.type.nunique()

2

In [7]:
data.type.unique()

array(['Movie', 'TV Show'], dtype=object)

In [34]:
data.rating.unique()

array(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R',
       'TV-G', 'G', 'NC-17', '74 min', '84 min', '66 min', 'NR', nan,
       'TV-Y7-FV', 'UR'], dtype=object)

In [9]:
print(data.country.unique())

['United States' 'South Africa' nan 'India'
 'United States, Ghana, Burkina Faso, United Kingdom, Germany, Ethiopia'
 'United Kingdom' 'Germany, Czech Republic' 'Mexico' 'Turkey' 'Australia'
 'United States, India, France' 'Finland' 'China, Canada, United States'
 'South Africa, United States, Japan' 'Nigeria' 'Japan'
 'Spain, United States' 'France' 'Belgium' 'United Kingdom, United States'
 'United States, United Kingdom' 'France, United States' 'South Korea'
 'Spain' 'United States, Singapore' 'United Kingdom, Australia, France'
 'United Kingdom, Australia, France, United States'
 'United States, Canada' 'Germany, United States'
 'South Africa, United States' 'United States, Mexico'
 'United States, Italy, France, Japan'
 'United States, Italy, Romania, United Kingdom'
 'Australia, United States' 'Argentina, Venezuela'
 'United States, United Kingdom, Canada' 'China, Hong Kong' 'Russia'
 'Canada' 'Hong Kong' 'United States, China, Hong Kong'
 'Italy, United States' 'United States, G

### Display the count of movies in the release year 2020 for each country

In [35]:
BM = data.release_year == 2020  #using boolean mask

In [11]:
print(data.country[BM])

0                           United States
16                                    NaN
17                                 Mexico
32                         United Kingdom
34                                    NaN
                      ...                
5972                               Canada
7594                        United States
8099                        United States
8125    United States, South Korea, China
8132                        United States
Name: country, Length: 953, dtype: object


In [19]:
x = data.query('release_year==2020')  #using query
x.country

0                           United States
16                                    NaN
17                                 Mexico
32                         United Kingdom
34                                    NaN
                      ...                
5972                               Canada
7594                        United States
8099                        United States
8125    United States, South Korea, China
8132                        United States
Name: country, Length: 953, dtype: object

In [20]:
x.country.value_counts()

United States                             336
India                                      75
United Kingdom                             44
South Korea                                28
Canada                                     26
                                         ... 
Germany, Czech Republic                     1
United States, Bulgaria                     1
United Kingdom, France, Germany, Spain      1
Thailand, United States                     1
United States, South Korea, China           1
Name: country, Length: 107, dtype: int64

In [21]:
data[BM].country.value_counts()

United States                             336
India                                      75
United Kingdom                             44
South Korea                                28
Canada                                     26
                                         ... 
Germany, Czech Republic                     1
United States, Bulgaria                     1
United Kingdom, France, Germany, Spain      1
Thailand, United States                     1
United States, South Korea, China           1
Name: country, Length: 107, dtype: int64

### Display the count of TV shows in each country

In [25]:
BM = data.type == 'TV Show'
data[BM].country.value_counts()

United States                                    760
United Kingdom                                   213
Japan                                            169
South Korea                                      158
India                                             79
                                                ... 
Belarus                                            1
United Kingdom, Australia                          1
France, Australia, Germany                         1
Australia, New Zealand, United States              1
United States, France, South Korea, Indonesia      1
Name: country, Length: 196, dtype: int64

### Display the ratings of TV Shows in each year

In [26]:
data.rating.unique()

array(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R',
       'TV-G', 'G', 'NC-17', '74 min', '84 min', '66 min', 'NR', nan,
       'TV-Y7-FV', 'UR'], dtype=object)

### PG

In [30]:
BM = data.rating == 'PG'
data[BM].release_year.value_counts()

2018    31
2011    18
2014    15
2017    15
2020    15
2013    14
2016    14
2019    12
2021    11
2015    11
2007    10
2008    10
2009    10
2010     8
2006     8
2004     8
2012     7
2005     5
2001     5
2003     5
2002     4
1976     4
1984     4
2000     4
1993     4
1989     3
1994     3
1992     3
1981     2
1975     2
1982     2
1995     2
1978     2
1977     2
1986     2
1999     2
1983     1
1974     1
1973     1
1997     1
1980     1
1979     1
1985     1
1991     1
1990     1
1996     1
Name: release_year, dtype: int64

### TV-MA

In [31]:
BM = data.rating =='TV-MA'
data[BM].release_year.value_counts()

2018    549
2019    500
2020    469
2017    451
2016    341
2021    270
2015    184
2014     79
2013     64
2012     54
2010     42
2011     27
2008     25
2009     18
2006     17
2007     17
2004     10
2003      8
2005      8
1998      7
2002      7
1999      5
1991      5
1983      4
1993      4
1990      3
1992      3
1997      3
1985      3
1989      2
1978      2
1975      2
1945      2
1979      2
2001      2
1972      2
1982      2
1984      1
1973      1
1986      1
1981      1
1988      1
1970      1
1994      1
1977      1
2000      1
1987      1
1995      1
1963      1
1976      1
1967      1
Name: release_year, dtype: int64

### Movies released on 1998 in India

In [36]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [45]:
BM1 = data.release_year == 1998 
BM2 =  data.country == 'India'
data[BM1 & BM2].country

24      India
2074    India
2722    India
2734    India
5007    India
5462    India
Name: country, dtype: object