### import the libraries

In [45]:
import numpy as np
import pandas as pd

### reading the dataset

In [46]:
df = pd.read_csv('netflix_titles.csv')

### top n and bottom n rows

In [47]:
df.head(3)


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...


In [48]:
df.tail(3)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
8804,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."
8806,s8807,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...


### number of rows and colums are present

In [None]:
df.shape

### columns data types

In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


### data type conversion

In [50]:
# Strip leading/trailing space
df['date_added'] = df['date_added'].str.strip()

# Convert 'date_added' from object to datetime with specified format and handle errors
df['date_added'] = pd.to_datetime(df['date_added'], format='%B %d, %Y', errors='coerce')

In [51]:
# Verify the type conversion
df.dtypes

show_id                 object
type                    object
title                   object
director                object
cast                    object
country                 object
date_added      datetime64[ns]
release_year             int64
rating                  object
duration                object
listed_in               object
description             object
dtype: object

### get column names

In [52]:
df.columns


Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

### rename column

In [53]:
# rename date_added with date added
df.rename(columns={'date_added': 'date added'}, inplace=True)
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [54]:
# rename by replace all the _ with space
df.rename(lambda x: x.replace("_", " "), axis='columns', inplace=True)
df.columns

Index(['show id', 'type', 'title', 'director', 'cast', 'country', 'date added',
       'release year', 'rating', 'duration', 'listed in', 'description'],
      dtype='object')

In [55]:
# rename by doing upper case
df.rename(str.upper, axis='columns', inplace=True)
df.columns

Index(['SHOW ID', 'TYPE', 'TITLE', 'DIRECTOR', 'CAST', 'COUNTRY', 'DATE ADDED',
       'RELEASE YEAR', 'RATING', 'DURATION', 'LISTED IN', 'DESCRIPTION'],
      dtype='object')

In [56]:
df.rename(str.lower, axis='columns', inplace=True)
df.columns

Index(['show id', 'type', 'title', 'director', 'cast', 'country', 'date added',
       'release year', 'rating', 'duration', 'listed in', 'description'],
      dtype='object')

### Number of occurrences of column values

In [88]:
df['country'].value_counts()

country
United States                             2818
India                                      972
United Kingdom                             419
Japan                                      245
South Korea                                199
                                          ... 
Romania, Bulgaria, Hungary                   1
Uruguay, Guatemala                           1
France, Senegal, Belgium                     1
Mexico, United States, Spain, Colombia       1
United Arab Emirates, Jordan                 1
Name: count, Length: 748, dtype: int64

In [89]:
# Number of occurrences of column values in proportions
df['country'].value_counts(normalize=True)

country
United States                             0.353310
India                                     0.121866
United Kingdom                            0.052533
Japan                                     0.030717
South Korea                               0.024950
                                            ...   
Romania, Bulgaria, Hungary                0.000125
Uruguay, Guatemala                        0.000125
France, Senegal, Belgium                  0.000125
Mexico, United States, Spain, Colombia    0.000125
United Arab Emirates, Jordan              0.000125
Name: proportion, Length: 748, dtype: float64

### Drop a column


In [90]:
# inplace=True is needed to get this removal effective on the data frame
df.drop(columns='title').head()

Unnamed: 0,show id,type,director,cast,country,date added,release year,rating,duration,listed in,description
0,s1,Movie,Kirsten Johnson,,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,,,,2021-09-24,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


### Drop rows

In [91]:
# inplace=True is needed to get this removal effective on the data frame
df.drop(index=[0,1,2]).head()

Unnamed: 0,show id,type,title,director,cast,country,date added,release year,rating,duration,listed in,description
3,s4,TV Show,Jailbirds New Orleans,,,,2021-09-24,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,2021-09-24,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,2021-09-24,2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...",2021-09-24,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."


### .loc

In [92]:
# can mention the columns to be displayed
df.loc[:, ['title', 'director']].head()


Unnamed: 0,title,director
0,Dick Johnson Is Dead,Kirsten Johnson
1,Blood & Water,
2,Ganglands,Julien Leclercq
3,Jailbirds New Orleans,
4,Kota Factory,


In [93]:
# for continious column selection
df.loc[:,'type':'country'].head()

Unnamed: 0,type,title,director,cast,country
0,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States
1,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa
2,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",
3,TV Show,Jailbirds New Orleans,,,
4,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India


### .iloc

In [94]:
df.iloc[[0,1,2],[5,6,7]]

Unnamed: 0,country,date added,release year
0,United States,2021-09-25,2020
1,South Africa,2021-09-24,2021
2,,2021-09-24,2021


In [95]:
# different way
df.iloc[0:2,5:7]

Unnamed: 0,country,date added
0,United States,2021-09-25
1,South Africa,2021-09-24
