In [1]:
# importing packages 'pandas'

import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
# import 'Netflix Entertainment Dataset'

netflixM = pd.read_csv("netflix1.csv")
pd.options.display.max_rows = 10000
pd.options.display.max_columns = 10000
netflixM.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China"


# Beginning of Data Wrangling

In [3]:
# Value_counts() function to sum total TV_Shows and Movies

netflixM.StreamR.value_counts()
netflixM.StreamR.value_counts()

Movie      4265
TV_Show    1969
Name: StreamR, dtype: int64

In [4]:
# Remove 'TV Shows' rows from StreamR column

# Get names of indexes for which column Age has value 30
index = netflixM[ netflixM['StreamR'] == "TV_Show" ].index

# Delete these row indexes from dataFrame
netflixM.drop(index , inplace=True)
netflixM.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China"


In [5]:
# Split and rename Columns

netflix_Movie = netflixM['listed_in'].str.split(',', expand=True).rename(columns = lambda x: 'Gengre'+str(x+1))
netflix_Movie.rename(columns = {'Gengre1' : 'International', 'Gengre2' : 'Sci-Fi', 'Gengre3' : 'Thrillers'}, inplace = True)
netflix_Movie.head(1)

Unnamed: 0,International,Sci-Fi,Thrillers
0,Children & Family Movies,Comedies,


In [6]:
# dummy coding netflix_Movie 'International' column
interDummy = pd.get_dummies(netflix_Movie['International'],drop_first=True)

# Concat 2 datasets 'netflixM' and 'movieDummy'
interMovie = pd.concat([netflixM, interDummy], axis = 1)
interMovie.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country,Anime Features,Children & Family Movies,Classic Movies,Comedies,Cult Movies,Documentaries,Dramas,Horror Movies,Independent Movies,International Movies,Movies,Music & Musicals,Romantic Movies,Sci-Fi & Fantasy,Sports Movies,Stand-Up Comedy,Thrillers
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [7]:
# Rename columns for movie-International column
interMovie.rename(columns = {'Anime Features': 'Anime','Children & Family Movies' : 'Family', 'Classic Movies': 'Classic', 
                          'Cult Movies': 'Cult', 'Horror Movies': 'Horror', 'Independent Movies': 'Independent',
                          'Romantic Movies': 'Romantic', 'International Movies': 'International',
                          'Music & Musicals' :'Musicals', 'Sports Movies' : 'Sports', 'TV Dramas': 'Dramas', 
                          'TV Horror': 'Horror', 'Sci-Fi & Fantasy' : 'Sci-Fi'}, inplace = True)
interMovie.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country,Anime,Family,Classic,Comedies,Cult,Documentaries,Dramas,Horror,Independent,International,Movies,Musicals,Romantic,Sci-Fi,Sports,Stand-Up Comedy,Thrillers
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Sci-Fi Column

In [8]:
# dummy coding crime column
sciDummy = pd.get_dummies(netflix_Movie['Sci-Fi'], drop_first=True)

# Concat 2 datasets 'tv' and 'crimeDummy'
sciMovie = pd.concat([netflixM, sciDummy], axis=1)
sciMovie.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country,Children & Family Movies,Classic Movies,Comedies,Cult Movies,Documentaries,Dramas,Faith & Spirituality,Horror Movies,Independent Movies,International Movies,LGBTQ Movies,Music & Musicals,Romantic Movies,Sci-Fi & Fantasy,Sports Movies,Stand-Up Comedy,Thrillers
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Thrillers Column

In [9]:
# dummy coding netflix_Movie 'International' column
trillDummy = pd.get_dummies(netflix_Movie['Thrillers'],drop_first=True)

# Concat 2 datasets 'netflixM' and 'movieDummy'
trillMovie = pd.concat([netflixM, trillDummy], axis = 1)
trillMovie.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country,Classic Movies,Comedies,Cult Movies,Documentaries,Dramas,Faith & Spirituality,Horror Movies,Independent Movies,International Movies,LGBTQ Movies,Music & Musicals,Romantic Movies,Sci-Fi & Fantasy,Sports Movies,Thrillers
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Concat movies columns 'International', 'Sci-Fi', and 'Thrillers' dataframes 

In [10]:
movieStreaming = pd.concat([interMovie, sciMovie, trillMovie], axis=1)
movieStreaming.head(1)

Unnamed: 0,show_id,title,StreamR,listed_in,date_added,release_year,country,Anime,Family,Classic,Comedies,Cult,Documentaries,Dramas,Horror,Independent,International,Movies,Musicals,Romantic,Sci-Fi,Sports,Stand-Up Comedy,Thrillers,show_id.1,title.1,StreamR.1,listed_in.1,date_added.1,release_year.1,country.1,Children & Family Movies,Classic Movies,Comedies.1,Cult Movies,Documentaries.1,Dramas.1,Faith & Spirituality,Horror Movies,Independent Movies,International Movies,LGBTQ Movies,Music & Musicals,Romantic Movies,Sci-Fi & Fantasy,Sports Movies,Stand-Up Comedy.1,Thrillers.1,show_id.2,title.2,StreamR.2,listed_in.2,date_added.2,release_year.2,country.2,Classic Movies.1,Comedies.2,Cult Movies.1,Documentaries.2,Dramas.2,Faith & Spirituality.1,Horror Movies.1,Independent Movies.1,International Movies.1,LGBTQ Movies.1,Music & Musicals.1,Romantic Movies.1,Sci-Fi & Fantasy.1,Sports Movies.1,Thrillers.2
0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,81145628,Norm of the North: King Sized Adventure,Movie,"Children & Family Movies, Comedies",2019-09-09,2019,"United States, India, South Korea, China",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [77]:
# Exporting dataframe: netflix to an csv file.
movieStreaming.to_csv("movieStreaming.csv", index = False) #Python