In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


In [2]:
#importing the dataset
netflix_data = pd.read_csv("netflix_titles.csv") 
netflix_data.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [3]:
netflix_data.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [4]:
netflix_data.shape

(8807, 12)

In [5]:
netflix_data.nunique(axis=0)


show_id         8807
type               2
title           8807
director        4528
cast            7692
country          748
date_added      1767
release_year      74
rating            17
duration         220
listed_in        514
description     8775
dtype: int64

In [6]:
netflix_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [7]:
strings = list(netflix_data.columns)
strings.remove('release_year')
for string in strings:
    netflix_data[string] = netflix_data[string].str.strip()

In [8]:
netflix_data.isnull().sum()

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

In [9]:
print(netflix_data.isnull().mean())

show_id         0.000000
type            0.000000
title           0.000000
director        0.299080
cast            0.093675
country         0.094357
date_added      0.001135
release_year    0.000000
rating          0.000454
duration        0.000341
listed_in       0.000000
description     0.000000
dtype: float64


In [10]:
netflix_data.director.fillna("", inplace=True)
netflix_data.cast.fillna("", inplace=True)
netflix_data.country.fillna("", inplace=True)
netflix_data.rating.fillna("", inplace=True)
netflix_data.duration.fillna("", inplace=True)
netflix_data.date_added.fillna("", inplace=True)

date_rows = []
for i in range(len(netflix_data)):
    if netflix_data['date_added'].iloc[i] == "":
        date_rows.append(i)

month_added = []
year_added = []
for i in range(len(netflix_data)):
    # replacing NaN values with 0
    if i in date_rows:
        month_added.append(0)
        year_added.append(0)
    else:
        date = netflix_data['date_added'].iloc[i].split(" ")
        month_added.append(date[0])
        year_added.append(int(date[2]))
        
# turning month names into month numbers
for i, month in enumerate(month_added):
    if month != 0:
        datetime_obj = datetime.strptime(month, "%B")
        month_number = datetime_obj.month
        month_added[i] = month_number
        
netflix_data.insert(7, "month_added", month_added, allow_duplicates = True)
netflix_data.insert(8, "year_added", year_added, allow_duplicates = True)
netflix_data

Unnamed: 0,show_id,type,title,director,cast,country,date_added,month_added,year_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",9,2021,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",9,2021,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",9,2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",9,2021,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",9,2021,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8802,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",11,2019,2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8803,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",7,2019,2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8804,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",11,2019,2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",1,2020,2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


In [11]:
netflix_data['season_count'] = netflix_data.apply(lambda x : x['duration'].split(" ")[0] if "Season" in x['duration'] else "", axis = 1)
netflix_data['duration'] = netflix_data.apply(lambda x : x['duration'].split(" ")[0] if "Season" not in x['duration'] else "", axis = 1)

In [12]:
tv = []
mv = []

# looping through the dataset to identify rows that are TV shows and films
for i in range(len(netflix_data)):
    if netflix_data['type'].iloc[i] == "TV Show":
        tv.append(i)
    else:
        mv.append(i)
 
# grouping rows that are TV shows
tv_shows = netflix_data.loc[tv, :]

#grouping rows that are films
movies = netflix_data.loc[mv, :]

# reseting the index of the new datasets
tv_shows = tv_shows.set_index([pd.Index(range(0, len(tv_shows)))])
movies = movies.set_index([pd.Index(range(0, len(movies)))])
tv_shows = tv_shows.drop(['duration', 'type'],axis=1)
movies = movies.drop(['season_count', 'type'],axis=1)

In [13]:
movies

Unnamed: 0,show_id,title,director,cast,country,date_added,month_added,year_added,release_year,rating,duration,listed_in,description
0,s1,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",9,2021,2020,PG-13,90,Documentaries,"As her father nears the end of his life, filmm..."
1,s7,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",9,2021,2021,PG,91,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
2,s8,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",9,2021,1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
3,s10,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",9,2021,2021,PG-13,104,"Comedies, Dramas",A woman adjusting to life after a loss contend...
4,s13,Je Suis Karl,Christian Schwochow,"Luna Wedler, Jannis Niewöhner, Milan Peschel, ...","Germany, Czech Republic","September 23, 2021",9,2021,2021,TV-MA,127,"Dramas, International Movies",After most of her family is murdered in a terr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6126,s8802,Zinzana,Majid Al Ansari,"Ali Suliman, Saleh Bakri, Yasa, Ali Al-Jabri, ...","United Arab Emirates, Jordan","March 9, 2016",3,2016,2015,TV-MA,96,"Dramas, International Movies, Thrillers",Recovering alcoholic Talal wakes up inside a s...
6127,s8803,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",11,2019,2007,R,158,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
6128,s8805,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",11,2019,2009,R,88,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
6129,s8806,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",1,2020,2006,PG,88,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


In [17]:
movies['rating'].unique()

TypeError: string indices must be integers

In [16]:
for i in range(len(movies)):
    if movies['rating'].iloc[i] == "UR":
        movies['rating'].iloc[i] = "NR"

TypeError: string indices must be integers

In [72]:
for i in range(len(movies)):
    if movies['rating'].iloc[i] in ['74 min', '84 min', '66 min']:
        print("Row:" + str(movies.iloc[i].name) + ", Duration:"+ str(movies.iloc[i].duration) + " , Rating: " + str(movies.iloc[i].rating))

TypeError: string indices must be integers

In [73]:
    
for i in rows:
    sv =  movies.iloc[i].rating.split(" ")
    length = sv[0]
    movies.iloc[i].duration = length
    movies.iloc[i].rating = "NR"

AttributeError: 'str' object has no attribute 'iloc'

In [74]:
movies['rating'].unique()

TypeError: string indices must be integers