In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing and Merging Necessary Datasets

In [3]:
# Import required datasets
netflixData = pd.read_csv(r"/Users/yenhann/Documents/MIT/15.572 Analytics Lab/Datasets/netflix_final_data.csv")
titleRatings = pd.read_csv(r"/Users/yenhann/Documents/MIT/15.572 Analytics Lab/Datasets/title.ratings.tsv", sep='\t') # note - we have no duplicates for title ratings - every row is a unique tv show or series
titleBasics = pd.read_csv(r"/Users/yenhann/Documents/MIT/15.572 Analytics Lab/Datasets/title.basics.tsv", sep='\t')

# Merge netflix data with ratings
netflixWithRatings = netflixData.merge(titleRatings, how = 'left', on = 'tconst')
netflixWithRatings

# Merge netflix data with ratings with basics data
netflixMaster = netflixWithRatings.merge(titleBasics, how = 'left', on = 'tconst')
netflixMaster

  titleBasics = pd.read_csv(r"/Users/yenhann/Documents/MIT/15.572 Analytics Lab/Datasets/title.basics.tsv", sep='\t')


Unnamed: 0,tconst,combined_title,release_year_x,is_top10,title,season,is_tv_show,averageRating,numVotes,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt10311562,#blackAF: Season 1,2020,False,#blackaf,1,True,6.8,4892.0,tvSeries,#BlackAF,#BlackAF,0,2020,2020,36,Comedy
1,tt12759384,(Un)Well: Season 1,2020,False,(un)well,1,True,6.4,1091.0,tvSeries,(Un)Well,(Un)Well,0,2020,\N,\N,Documentary
2,tt10803866,10 Days With Santa Claus,2020,False,10 days with santa claus,,False,6.1,856.0,movie,When Mom Is Away... With the Family,10 giorni con Babbo Natale,0,2020,\N,100,"Comedy,Family"
3,tt11823088,100 Humans: Season 1,2020,False,100 humans,1,True,5.8,4153.0,tvSeries,100 Humans: Life's Questions. Answered.,100 Humans,0,2020,\N,\N,Reality-TV
4,tt13782052,1000 Miles from Christmas,2021,True,1000 miles from christmas,,False,5.8,1531.0,movie,1000 Miles from Christmas,A mil kilómetros de la Navidad,0,2021,\N,102,"Comedy,Romance"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2222,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,True,jeen-yuhs: a kanye trilogy,,False,8.0,9043.0,tvMiniSeries,Jeen-yuhs: A Kanye Trilogy,Jeen-yuhs: A Kanye Trilogy,0,2022,2022,90,"Documentary,Music"
2223,tt11561206,the goop lab with Gwyneth Paltrow: Season 1,2020,False,the goop lab with gwyneth paltrow,1,True,2.5,3352.0,tvSeries,The Goop Lab,The Goop Lab,0,2020,\N,30,"Documentary,Reality-TV"
2224,tt8721424,"tick, tick...BOOM!",2021,True,"tick, tick...boom!",,False,7.5,103877.0,movie,"tick, tick... BOOM!","tick, tick...BOOM!",0,2021,\N,120,"Biography,Drama,Musical"
2225,tt9725830,Òlòtūré,2020,False,òlòtūré,,False,5.5,811.0,movie,Òlòtūré,Òlòturé,0,2019,\N,106,"Crime,Drama"


## Data Preparation and Wrangling

In [4]:
# drop duplicate columns - run this cell only once
netflixMaster.drop(['title','primaryTitle','originalTitle'], axis = 1, inplace = True)

In [5]:
# separate into movies and tv shows
movies = netflixMaster[netflixMaster['is_tv_show'] == False]
series = netflixMaster[netflixMaster['is_tv_show'] == True]

movies # 1103 movies
# series # 1124 series

Unnamed: 0,tconst,combined_title,release_year_x,is_top10,season,is_tv_show,averageRating,numVotes,titleType,isAdult,startYear,endYear,runtimeMinutes,genres
2,tt10803866,10 Days With Santa Claus,2020,False,,False,6.1,856.0,movie,0,2020,\N,100,"Comedy,Family"
4,tt13782052,1000 Miles from Christmas,2021,True,,False,5.8,1531.0,movie,0,2021,\N,102,"Comedy,Romance"
5,tt6275154,11M: Terror in Madrid,2022,True,,False,6.9,1928.0,movie,0,2022,\N,92,Documentary
7,tt16379224,137 Shots,2021,True,,False,6.3,932.0,movie,0,2021,\N,104,"Crime,Documentary"
8,tt3975918,13: The Musical,2022,False,,False,5.2,621.0,movie,0,2022,\N,91,"Comedy,Drama,Family"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2219,tt14892216,Zero to Hero,2021,True,,False,6.7,925.0,movie,0,2021,\N,102,"Biography,Drama,Sport"
2221,tt13615386,"ariana grande: excuse me, i love you",2020,False,,False,6.3,2648.0,movie,0,2020,\N,97,"Documentary,Music"
2222,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,True,,False,8.0,9043.0,tvMiniSeries,0,2022,2022,90,"Documentary,Music"
2224,tt8721424,"tick, tick...BOOM!",2021,True,,False,7.5,103877.0,movie,0,2021,\N,120,"Biography,Drama,Musical"


## Movies

In [6]:
# drop is_tv_show column from both movies and series dataframes
movies = movies.drop(['is_tv_show', 'season', 'startYear', 'endYear'], axis = 1).reset_index(drop = True)

# remove any rows with > 1 NaN entry
rowWiseNullCounter = pd.DataFrame(movies.isnull().sum(axis = 1))
indexRowsToRemove1 = list(rowWiseNullCounter[rowWiseNullCounter[0] >= 1].index.values)

for i in indexRowsToRemove1:
    movies.drop(index = i, axis = 1, inplace = True)

movies = movies.reset_index(drop = True)

In [7]:
# define function to extract genres of each film and one hot encode genres
def oneHotEncodeGenres(df):

    for i in range(0, df.shape[0]):
        genresJoined = df.loc[i, 'genres']
        genresList = genresJoined.split(',')

        # Action
        if 'Action' in genresList:
            df.loc[i, 'isAction'] = int(1)
        else:
            df.loc[i, 'isAction'] = int(0)
        
        # Adventure
        if 'Adventure' in genresList:
            df.loc[i, 'isAdventure'] = int(1)
        else:
            df.loc[i, 'isAdventure'] = int(0)
        
        # Animation
        if 'Animation' in genresList:
            df.loc[i, 'isAnimation'] = int(1)
        else:
            df.loc[i, 'isAnimation'] = int(0)

        # Biography
        if 'Biography' in genresList:
            df.loc[i, 'isBiography'] = int(1)
        else:
            df.loc[i, 'isBiography'] = int(0)

        # Comedy
        if 'Comedy' in genresList:
            df.loc[i, 'isComedy'] = int(1)
        else:
            df.loc[i, 'isComedy'] = int(0)

        # Crime
        if 'Crime' in genresList:
            df.loc[i, 'isCrime'] = int(1)
        else:
            df.loc[i, 'isCrime'] = int(0)

        # Documentary
        if 'Documentary' in genresList:
            df.loc[i, 'isDocumentary'] = int(1)
        else:
            df.loc[i, 'isDocumentary'] = int(0)

        # Drama
        if 'Drama' in genresList:
            df.loc[i, 'isDrama'] = int(1)
        else:
            df.loc[i, 'isDrama'] = int(0)

        # Family
        if 'Family' in genresList:
            df.loc[i, 'isFamily'] = int(1)
        else:
            df.loc[i, 'isFamily'] = int(0)

        # Fantasy
        if 'Fantasy' in genresList:
            df.loc[i, 'isFantasy'] = int(1)
        else:
            df.loc[i, 'isFantasy'] = int(0)
        
        # Game-Show
        if 'Game-Show' in genresList:
            df.loc[i, 'isGameShow'] = int(1)
        else:
            df.loc[i, 'isGameShow'] = int(0)
        
        # History
        if 'History' in genresList:
            df.loc[i, 'isHistory'] = int(1)
        else:
            df.loc[i, 'isHistory'] = int(0)
        
        # Horror
        if 'Horror' in genresList:
            df.loc[i, 'isHorror'] = int(1)
        else:
            df.loc[i, 'isHorror'] = int(0)

        # Music
        if 'Music' in genresList:
            df.loc[i, 'isMusic'] = int(1)
        else:
            df.loc[i, 'isMusic'] = int(0)

        # Musical
        if 'Musical' in genresList:
            df.loc[i, 'isMusical'] = int(1)
        else:
            df.loc[i, 'isMusical'] = int(0)

        # Mystery
        if 'Mystery' in genresList:
            df.loc[i, 'isMystery'] = int(1)
        else:
            df.loc[i, 'isMystery'] = int(0)

        # News
        if 'News' in genresList:
            df.loc[i, 'isNews'] = int(1)
        else:
            df.loc[i, 'isNews'] = int(0)

        # Reality-TV
        if 'Reality-TV' in genresList:
            df.loc[i, 'isRealityTV'] = int(1)
        else:
            df.loc[i, 'isRealityTV'] = int(0)

        # Romance
        if 'Romance' in genresList:
            df.loc[i, 'isRomance'] = int(1)
        else:
            df.loc[i, 'isRomance'] = int(0)
        
        # Sci-Fi
        if 'Sci-Fi' in genresList:
            df.loc[i, 'isSciFi'] = int(1)
        else:
            df.loc[i, 'isSciFi'] = int(0)

        # Short
        if 'Short' in genresList:
            df.loc[i, 'isShort'] = int(1)
        else:
            df.loc[i, 'isShort'] = int(0)

        # Sport       
        if 'Sport' in genresList:
            df.loc[i, 'isSport'] = int(1)
        else:
            df.loc[i, 'isSport'] = int(0)
        
        # Talk-Show
        if 'Talk-Show' in genresList:
            df.loc[i, 'isTalkShow'] = int(1)
        else:
            df.loc[i, 'isTalkShow'] = int(0)
        
        # Thriller
        if 'Thriller' in genresList:
            df.loc[i, 'isThriller'] = int(1)
        else:
            df.loc[i, 'isThriller'] = int(0)
        
        # War
        if 'War' in genresList:
            df.loc[i, 'isWar'] = int(1)
        else:
            df.loc[i, 'isWar'] = int(0)
        
        # Western
        if 'Western' in genresList:
            df.loc[i, 'isWestern'] = int(1)
        else:
            df.loc[i, 'isWestern'] = int(0)
            
    return df

In [8]:
# One hot encode genres of each movie
movies = oneHotEncodeGenres(movies)
movies = movies.drop('genres', axis = 1)

In [9]:
movies

Unnamed: 0,tconst,combined_title,release_year_x,is_top10,averageRating,numVotes,titleType,isAdult,runtimeMinutes,isAction,...,isNews,isRealityTV,isRomance,isSciFi,isShort,isSport,isTalkShow,isThriller,isWar,isWestern
0,tt10803866,10 Days With Santa Claus,2020,False,6.1,856.0,movie,0,100,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,tt13782052,1000 Miles from Christmas,2021,True,5.8,1531.0,movie,0,102,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,tt6275154,11M: Terror in Madrid,2022,True,6.9,1928.0,movie,0,92,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,tt16379224,137 Shots,2021,True,6.3,932.0,movie,0,104,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,tt3975918,13: The Musical,2022,False,5.2,621.0,movie,0,91,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,tt14892216,Zero to Hero,2021,True,6.7,925.0,movie,0,102,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1086,tt13615386,"ariana grande: excuse me, i love you",2020,False,6.3,2648.0,movie,0,97,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1087,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,True,8.0,9043.0,tvMiniSeries,0,90,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1088,tt8721424,"tick, tick...BOOM!",2021,True,7.5,103877.0,movie,0,120,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# # Genre extraction - uncomment to see unique genres
# genresList = []

# for i in range(0, movies.shape[0]):
#     genresJoined = movies.loc[i, 'genres']
#     genresList = genresList + genresJoined.split(',')

# uniqueGenres = set(genresList)
# uniqueGenres
    

In [11]:
# One hot encoding for titleType
oneHot_titleType = pd.get_dummies(movies['titleType'])
movies = movies.join(oneHot_titleType)
movies = movies.drop('titleType', axis = 1)
movies

Unnamed: 0,tconst,combined_title,release_year_x,is_top10,averageRating,numVotes,isAdult,runtimeMinutes,isAction,isAdventure,...,isWar,isWestern,movie,short,tvEpisode,tvMiniSeries,tvMovie,tvSeries,tvShort,tvSpecial
0,tt10803866,10 Days With Santa Claus,2020,False,6.1,856.0,0,100,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1,tt13782052,1000 Miles from Christmas,2021,True,5.8,1531.0,0,102,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
2,tt6275154,11M: Terror in Madrid,2022,True,6.9,1928.0,0,92,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
3,tt16379224,137 Shots,2021,True,6.3,932.0,0,104,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
4,tt3975918,13: The Musical,2022,False,5.2,621.0,0,91,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,tt14892216,Zero to Hero,2021,True,6.7,925.0,0,102,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1086,tt13615386,"ariana grande: excuse me, i love you",2020,False,6.3,2648.0,0,97,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1087,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,True,8.0,9043.0,0,90,0.0,0.0,...,0.0,0.0,0,0,0,1,0,0,0,0
1088,tt8721424,"tick, tick...BOOM!",2021,True,7.5,103877.0,0,120,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0


In [12]:
# print('Number of Movies of 1090 = ' + str(sum(movies['movie'])))
# print('Number of Shorts of 1090 = ' + str(sum(movies['short'])))
# print('Number of tvEpisodes of 1090 = ' + str(sum(movies['tvEpisode'])))
# print('Number of tvMiniSeries of 1090 = ' + str(sum(movies['tvMiniSeries'])))
# print('Number of tvMovie of 1090 = ' + str(sum(movies['tvMovie'])))
# print('Number of tvSeries of 1090 = ' + str(sum(movies['tvSeries'])))
# print('Number of tvShort of 1090 = ' + str(sum(movies['tvShort'])))
# print('Number of tvSpecial of 1090 = ' + str(sum(movies['tvSpecial'])))


In [13]:
# Convert is_top10 column to 1 or 0
for i in range(0, movies.shape[0]):
    if movies.loc[i, 'is_top10'] == True:
        movies.loc[i, 'is_top10'] = int(1)
    else:
        movies.loc[i, 'is_top10'] = int(0)

movies

Unnamed: 0,tconst,combined_title,release_year_x,is_top10,averageRating,numVotes,isAdult,runtimeMinutes,isAction,isAdventure,...,isWar,isWestern,movie,short,tvEpisode,tvMiniSeries,tvMovie,tvSeries,tvShort,tvSpecial
0,tt10803866,10 Days With Santa Claus,2020,0,6.1,856.0,0,100,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1,tt13782052,1000 Miles from Christmas,2021,1,5.8,1531.0,0,102,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
2,tt6275154,11M: Terror in Madrid,2022,1,6.9,1928.0,0,92,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
3,tt16379224,137 Shots,2021,1,6.3,932.0,0,104,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
4,tt3975918,13: The Musical,2022,0,5.2,621.0,0,91,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,tt14892216,Zero to Hero,2021,1,6.7,925.0,0,102,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1086,tt13615386,"ariana grande: excuse me, i love you",2020,0,6.3,2648.0,0,97,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1087,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,1,8.0,9043.0,0,90,0.0,0.0,...,0.0,0.0,0,0,0,1,0,0,0,0
1088,tt8721424,"tick, tick...BOOM!",2021,1,7.5,103877.0,0,120,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0


In [14]:
movies.iloc[:, 8:34] = movies.iloc[:, 8:34].astype(int)
movies

Unnamed: 0,tconst,combined_title,release_year_x,is_top10,averageRating,numVotes,isAdult,runtimeMinutes,isAction,isAdventure,...,isWar,isWestern,movie,short,tvEpisode,tvMiniSeries,tvMovie,tvSeries,tvShort,tvSpecial
0,tt10803866,10 Days With Santa Claus,2020,0,6.1,856.0,0,100,0,0,...,0,0,1,0,0,0,0,0,0,0
1,tt13782052,1000 Miles from Christmas,2021,1,5.8,1531.0,0,102,0,0,...,0,0,1,0,0,0,0,0,0,0
2,tt6275154,11M: Terror in Madrid,2022,1,6.9,1928.0,0,92,0,0,...,0,0,1,0,0,0,0,0,0,0
3,tt16379224,137 Shots,2021,1,6.3,932.0,0,104,0,0,...,0,0,1,0,0,0,0,0,0,0
4,tt3975918,13: The Musical,2022,0,5.2,621.0,0,91,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,tt14892216,Zero to Hero,2021,1,6.7,925.0,0,102,0,0,...,0,0,1,0,0,0,0,0,0,0
1086,tt13615386,"ariana grande: excuse me, i love you",2020,0,6.3,2648.0,0,97,0,0,...,0,0,1,0,0,0,0,0,0,0
1087,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,1,8.0,9043.0,0,90,0,0,...,0,0,0,0,0,1,0,0,0,0
1088,tt8721424,"tick, tick...BOOM!",2021,1,7.5,103877.0,0,120,0,0,...,0,0,1,0,0,0,0,0,0,0


## CART Modelling

In [15]:
moviesCleaned = movies[movies['runtimeMinutes'] != '\\N']
moviesCleaned

Unnamed: 0,tconst,combined_title,release_year_x,is_top10,averageRating,numVotes,isAdult,runtimeMinutes,isAction,isAdventure,...,isWar,isWestern,movie,short,tvEpisode,tvMiniSeries,tvMovie,tvSeries,tvShort,tvSpecial
0,tt10803866,10 Days With Santa Claus,2020,0,6.1,856.0,0,100,0,0,...,0,0,1,0,0,0,0,0,0,0
1,tt13782052,1000 Miles from Christmas,2021,1,5.8,1531.0,0,102,0,0,...,0,0,1,0,0,0,0,0,0,0
2,tt6275154,11M: Terror in Madrid,2022,1,6.9,1928.0,0,92,0,0,...,0,0,1,0,0,0,0,0,0,0
3,tt16379224,137 Shots,2021,1,6.3,932.0,0,104,0,0,...,0,0,1,0,0,0,0,0,0,0
4,tt3975918,13: The Musical,2022,0,5.2,621.0,0,91,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,tt14892216,Zero to Hero,2021,1,6.7,925.0,0,102,0,0,...,0,0,1,0,0,0,0,0,0,0
1086,tt13615386,"ariana grande: excuse me, i love you",2020,0,6.3,2648.0,0,97,0,0,...,0,0,1,0,0,0,0,0,0,0
1087,tt14599438,jeen-yuhs: A Kanye Trilogy,2022,1,8.0,9043.0,0,90,0,0,...,0,0,0,0,0,1,0,0,0,0
1088,tt8721424,"tick, tick...BOOM!",2021,1,7.5,103877.0,0,120,0,0,...,0,0,1,0,0,0,0,0,0,0


In [16]:
# Split movies into training and test sets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

# Remove tconst and movie title columns and separate into X and Y
moviesCleanedRuntimes = movies[movies['runtimeMinutes'] != '\\N']
moviesCleanedRuntimes = moviesCleanedRuntimes.reset_index(drop = True)
moviesCleaned = moviesCleanedRuntimes.drop(['tconst','combined_title'], axis = 1).copy()
Y = moviesCleaned.is_top10.copy()
X = moviesCleaned.drop(['is_top10'], axis = 1).copy()
X = X.drop('isAdult', axis = 1)
X['runtimeMinutes'] = X['runtimeMinutes'].astype(float)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, Y.astype(int), test_size = 0.3, random_state = 100)

In [17]:
# Train Decision Tree model
DecisionTreeModel = DecisionTreeClassifier(criterion = 'entropy', random_state = 100, max_depth = 3, min_samples_leaf = 5)
DecisionTreeModel.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=5,
                       random_state=100)

In [23]:
# Generate predictions on test set
from sklearn.metrics import accuracy_score
y_pred = DecisionTreeModel.predict(X_test)
print('Accuracy = ' + str(accuracy_score(y_test, y_pred)*100))

# Confusion matrix scores - Precision, Recall, F1-Score
TP = 0
TN = 0
FP = 0
FN = 0

for i in range(0, len(y_pred)):
    if y_pred[i] == 1 and list(y_test)[i] == 1:
        TP += 1
    elif y_pred[i] == 1 and list(y_test)[i] == 0:
        FP += 1
    elif y_pred[i] == 0 and list(y_test)[i] == 1:
        FN += 1
    elif y_pred[i] == 0 and list(y_test)[i] == 0:
        TN += 1

precision = 100*(TP/(TP+FP))
recall = 100*(TP/(TP+FN))
f1score = 100*2*TP/(2*TP+FP+FN)

print('Precision = ' + str(precision))
print('Recall = ' + str(recall))
print('F1-Score = ' + str(f1score))




Accuracy = 67.1875
Precision = 68.52791878172589
Recall = 75.84269662921348
F1-Score = 72.0


In [21]:
list(y_test)[i]

0