In [615]:
import numpy as np
import pandas as pd
import copy
  

### Importing file

In [616]:
films_df = pd.read_csv('netflix_titles.csv', delimiter=',')
films_df.head(8)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."


### How data looks like

In [617]:
films_df.isna().sum()

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

### We need to find production which was added most recently, we do not have to delete every row which has any null/none values - only if that value will be found in column 'date_added' for now.

In [618]:
dropped_nulls_from_date_added = films_df.dropna(subset=['date_added'])
dropped_nulls_from_date_added.isna().sum()

show_id            0
type               0
title              0
director        2624
cast             825
country          830
date_added         0
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

In [619]:
dropped_nulls_from_date_added.dtypes

show_id         object
type            object
title           object
director        object
cast            object
country         object
date_added      object
release_year     int64
rating          object
duration        object
listed_in       object
description     object
dtype: object

In [620]:
dropped_nulls_from_date_added.loc[:, 'date_added'] = pd.to_datetime(dropped_nulls_from_date_added['date_added'])
dropped_nulls_from_date_added.head(5)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dropped_nulls_from_date_added.loc[:, 'date_added'] = pd.to_datetime(dropped_nulls_from_date_added['date_added'])


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,2021-09-24,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [621]:
most_recent_date = dropped_nulls_from_date_added['date_added'].max()
most_recent_date

production_added_most_recently = dropped_nulls_from_date_added.loc[(dropped_nulls_from_date_added['date_added'] == most_recent_date)]
production_added_most_recently

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."


### We can easily see that in column 'duration' we do not have the same format for each answer, so we have to deal with it

In [622]:
films_df['duration'].head(10)

0       90 min
1    2 Seasons
2     1 Season
3     1 Season
4    2 Seasons
5     1 Season
6       91 min
7      125 min
8    9 Seasons
9      104 min
Name: duration, dtype: object

### However, if we look closer to column 'type', we can get to know that format, in which is duration given, depends on type.
### When type is equal to 'Movie' it is given in string which contains number of minutes and string 'min', but when type equals 'TV Show', it is given in string which informs us about number of seasons.

In [623]:
duration_df = films_df.dropna(subset=['duration'])
duration_df.isna().sum()

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           0
listed_in          0
description        0
dtype: int64

In [624]:
duration_df.groupby(['type']).size()

type
Movie      6128
TV Show    2676
dtype: int64

In [625]:
for index, (film_type, duration_time) in (duration_df[['type', 'duration']]).iterrows():
    if film_type == 'Movie':
        try:
            duration_df.loc[index, 'duration'] = int(duration_time.strip(' min'))
        except TypeError:
            print('sth does not work properly')
        else:
            pass
    else:
        try:
            duration_df.loc[index, 'duration'] = int(duration_time[:duration_time.find(' ')])
        except TypeError:
            print('sth does not work properly')

duration_df.head(10)


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
9,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104,"Comedies, Dramas",A woman adjusting to life after a loss contend...


### We have one additional problem, a few films where made by several countries, so we have to sort this issue out

In [626]:
duration_df = duration_df.dropna(subset=['country'])
duration_df = duration_df.assign(country=duration_df['country'].str.split(', ')).explode('country')
duration_df.head(10)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...",United States,"September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...",Ghana,"September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...",Burkina Faso,"September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...",United Kingdom,"September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...",Germany,"September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
7,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...",Ethiopia,"September 24, 2021",1993,TV-MA,125,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
8,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...


In [627]:
movie_new_df = duration_df[['type', 'country', 'duration']].loc[duration_df['type'] == 'Movie'].groupby(by=['country', 'type']).agg({'duration': ['mean', 'max']}).reset_index()
movie_new_df.sort_values(by=('duration', 'mean'), ascending=False).head(10)

Unnamed: 0_level_0,country,type,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,max
58,Liechtenstein,Movie,200.0,200
67,Montenegro,Movie,157.0,157
99,Soviet Union,Movie,156.666667,163
9,Bahamas,Movie,145.0,145
19,Cameroon,Movie,143.0,143
68,Morocco,Movie,137.333333,205
63,Malta,Movie,135.5,149
43,India,Movie,125.912682,228
32,Ethiopia,Movie,125.0,125
16,Burkina Faso,Movie,125.0,125


### When country did not produce many movies - especially when it produced only one - we can come across situations when movie was long and the average of all films for this country will be equal to this one specific value. In that case, countries which produced many films with different duration can not be placed even in the top 10. Let's check maximum of films' duration also.


In [628]:
movie_new_df.sort_values(by=('duration', 'max'), ascending=False).head(10)

Unnamed: 0_level_0,country,type,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,max
114,United States,Movie,93.759825,312
31,Egypt,Movie,108.333333,253
49,Italy,Movie,103.573333,229
43,India,Movie,125.912682,228
112,United Kingdom,Movie,98.898496,224
51,Japan,Movie,98.848739,208
68,Morocco,Movie,137.333333,205
73,New Zealand,Movie,105.8,201
58,Liechtenstein,Movie,200.0,200
72,Netherlands,Movie,94.833333,196


In [629]:
tv_show_new_df = duration_df[['type', 'country', 'duration']].loc[duration_df['type'] == 'TV Show'].groupby(by=['country', 'type']).agg({'duration': ['mean', 'max']}).reset_index()
tv_show_new_df.sort_values(by=('duration', 'mean'), ascending=False).head(10)

Unnamed: 0_level_0,country,type,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,max
36,Malta,TV Show,4.0,4
61,United Arab Emirates,TV Show,3.0,3
23,Hungary,TV Show,3.0,3
8,Canada,TV Show,2.81746,15
27,Ireland,TV Show,2.571429,5
16,Denmark,TV Show,2.428571,10
63,United States,TV Show,2.313433,17
12,Croatia,TV Show,2.0,2
18,Finland,TV Show,2.0,3
60,Ukraine,TV Show,2.0,3


### Let's do the same as above

In [630]:
tv_show_new_df.sort_values(by=('duration', 'max'), ascending=False).head(10)

Unnamed: 0_level_0,country,type,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,max
63,United States,TV Show,2.313433,17
8,Canada,TV Show,2.81746,15
50,Singapore,TV Show,1.478261,10
16,Denmark,TV Show,2.428571,10
62,United Kingdom,TV Show,1.900735,10
30,Japan,TV Show,1.58794,9
2,Australia,TV Show,1.909091,8
19,France,TV Show,1.711111,7
11,Colombia,TV Show,1.46875,7
35,Malaysia,TV Show,1.75,7


### We can performe very similar procedure in order to find answer to third question
This time we will add new, auxiliary column called 'genre'

In [631]:
genres_df = films_df.dropna(subset=['listed_in', 'country'])
genres_df = genres_df.assign(genre=genres_df['listed_in'].str.strip(' , ').str.split(', '))
genres_df = genres_df.assign(country=genres_df['country'].str.strip(' , ').str.split(', '))
genres_df = genres_df.explode('genre')
genres_df = genres_df.explode('country')

print(genres_df.isna().sum())


result = genres_df.groupby(by=['country', 'genre'], as_index=False).size()
result.head(50)


show_id            0
type               0
title              0
director        5714
cast            1547
country            0
date_added        19
release_year       0
rating             5
duration           3
listed_in          0
description        0
genre              0
dtype: int64


Unnamed: 0,country,genre,size
0,Afghanistan,Documentaries,1
1,Afghanistan,International Movies,1
2,Albania,Dramas,1
3,Albania,International Movies,1
4,Algeria,Classic Movies,1
5,Algeria,Dramas,3
6,Algeria,Independent Movies,1
7,Algeria,International Movies,3
8,Angola,Action & Adventure,1
9,Angola,International Movies,1


### I had been looking for reason why these empty strings had been appearing all the time for 3 hours :( Inappropriate format of data is the worst thing i have ever met. 

In [632]:
genres_df.loc[genres_df['country'] == '']

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,genre


In [633]:
genres_df = genres_df.loc[genres_df['country'] != '']
result = genres_df.groupby(by=['country', 'genre'], as_index=False).size()
result.head(50)

Unnamed: 0,country,genre,size
0,Afghanistan,Documentaries,1
1,Afghanistan,International Movies,1
2,Albania,Dramas,1
3,Albania,International Movies,1
4,Algeria,Classic Movies,1
5,Algeria,Dramas,3
6,Algeria,Independent Movies,1
7,Algeria,International Movies,3
8,Angola,Action & Adventure,1
9,Angola,International Movies,1


In [634]:
result.loc[result['country'] == 'Poland']

Unnamed: 0,country,genre,size
939,Poland,Action & Adventure,3
940,Poland,Classic Movies,2
941,Poland,Comedies,6
942,Poland,Crime TV Shows,6
943,Poland,Cult Movies,1
944,Poland,Documentaries,1
945,Poland,Docuseries,1
946,Poland,Dramas,25
947,Poland,Horror Movies,2
948,Poland,Independent Movies,5


In [635]:
genres = result['genre'].unique()
countries = result['country'].unique()
countries = np.delete(countries, [0])

In [636]:
result.head(10)

Unnamed: 0,country,genre,size
0,Afghanistan,Documentaries,1
1,Afghanistan,International Movies,1
2,Albania,Dramas,1
3,Albania,International Movies,1
4,Algeria,Classic Movies,1
5,Algeria,Dramas,3
6,Algeria,Independent Movies,1
7,Algeria,International Movies,3
8,Angola,Action & Adventure,1
9,Angola,International Movies,1


In [637]:
rating_df = films_df.dropna(subset=['country', 'rating'])
rating_df = rating_df.loc[rating_df['country'] != '']

to_assign = rating_df['country'].str.strip(' , ').str.split(', ')
rating_df = rating_df.assign(country=to_assign).explode('country')

counted = rating_df.groupby(by=['country', 'rating'], as_index=False).size()
counted.head(50)

Unnamed: 0,country,rating,size
0,Afghanistan,TV-MA,1
1,Albania,TV-MA,1
2,Algeria,TV-14,1
3,Algeria,TV-MA,2
4,Angola,TV-MA,1
5,Argentina,G,1
6,Argentina,NR,2
7,Argentina,PG,1
8,Argentina,R,2
9,Argentina,TV-14,10


In [638]:
counted.sort_values(['country', 'size'], ascending=[True, False]).head(50)

Unnamed: 0,country,rating,size
0,Afghanistan,TV-MA,1
1,Albania,TV-MA,1
3,Algeria,TV-MA,2
2,Algeria,TV-14,1
4,Angola,TV-MA,1
11,Argentina,TV-MA,67
9,Argentina,TV-14,10
12,Argentina,TV-PG,4
6,Argentina,NR,2
8,Argentina,R,2


### Let's find most common rating for each country and each director - to do this i have written a usefull function. Due to the fact that some countries and directors might have several ratings with the biggest cardinality, this task is not that easy as it could be.

In [639]:

def most_common_rating(counted, checked_element):

    current_checked_element = None
    highest_cardinality = -1
    ratings = {checked_element : [], 'rating' : [], 'cardinality' : []}
    checked_element_rating = []

    for index, (checked_element_item, rating, size) in counted.iterrows():

        if current_checked_element is None:
            current_checked_element = copy.copy(checked_element_item)

        if checked_element_item != current_checked_element or int(counted[-1:].index[0]) == index:
            
            ratings[checked_element].append(current_checked_element)
            ratings['rating'].append(', '.join(checked_element_rating))
            ratings['cardinality'].append(highest_cardinality)

            current_checked_element = copy.copy(checked_element_item)
            checked_element_rating.clear()
            highest_cardinality = -1
            
        if size > highest_cardinality:
            checked_element_rating.clear()
            checked_element_rating += [rating]
            highest_cardinality = size

        elif size == highest_cardinality:
            checked_element_rating += [rating]

    return ratings

ratings = pd.DataFrame(most_common_rating(counted, 'country'))
ratings.head(20)

Unnamed: 0,country,rating,cardinality
0,Afghanistan,TV-MA,1
1,Albania,TV-MA,1
2,Algeria,TV-MA,2
3,Angola,TV-MA,1
4,Argentina,TV-MA,67
5,Armenia,TV-MA,1
6,Australia,TV-MA,44
7,Austria,TV-MA,6
8,Azerbaijan,TV-14,1
9,Bahamas,PG-13,1


In [640]:
directors_df = films_df.dropna(subset=['director', 'rating'])
print(directors_df.size)
directors_df = directors_df.assign(director = directors_df['director'].str.strip(' , ').str.split(', ')).explode('director')
print(directors_df.size)

74064
83724


In [641]:
directors_res = directors_df[['director', 'rating']].groupby(by=['director', 'rating'], as_index=False).size()
directors_res[30:40]

Unnamed: 0,director,rating,size
30,Abel Ferrara,R,1
31,Abhay Chopra,TV-MA,1
32,Abhijeet Deshpande,TV-14,1
33,Abhijit Kokate,TV-MA,1
34,Abhijit Panse,TV-14,2
35,Abhinav Shiv Tiwari,TV-MA,1
36,Abhinay Deo,TV-14,2
37,Abhinay Deo,TV-MA,1
38,Abhishek Chaubey,TV-14,3
39,Abhishek Chaubey,TV-MA,2


In [642]:
directors_res = directors_res.sort_values(['director', 'size'], ascending=[True, False])
res = most_common_rating(directors_res, 'director')
directors_res = pd.DataFrame(res)
directors_res.head(20)

Unnamed: 0,director,rating,cardinality
0,A. L. Vijay,TV-14,2
1,A. Raajdheep,TV-14,1
2,A. Salaam,TV-14,1
3,A.R. Murugadoss,"TV-14, TV-MA",1
4,Aadish Keluskar,TV-MA,1
5,Aamir Bashir,TV-MA,1
6,Aamir Khan,PG,1
7,Aanand Rai,TV-14,1
8,Aaron Burns,TV-MA,1
9,Aaron Hancox,TV-PG,1


### If it is possible to do the $5^{th}$ task faster, using for example inner join or sth like that, you can give me a response :P

In [643]:
directors_actors_df = films_df.dropna(subset=['director', 'cast'])
directors_actors_df = directors_actors_df.assign(director=directors_actors_df['director'].str.strip(' , ').str.split(', ')).explode('director')
directors_actors_df = directors_actors_df.assign(actor=directors_actors_df['cast'].str.strip(' , ').str.split(', ')).explode('actor')
directors_actors_df.head(20)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,actor
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Sami Bouajila
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Tracy Gotoas
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Samuel Jouy
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Nabiha Akkari
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Sofia Lesaffre
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Salim Kechiouche
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Noureddine Farihi
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Geert Van Rampelberg
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,Bakary Diombera
5,s6,TV Show,Midnight Mass,Mike Flanagan,"Kate Siegel, Zach Gilford, Hamish Linklater, H...",,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,Kate Siegel


In [644]:
directors_actors_df = directors_actors_df[['show_id', 'title', 'director', 'actor']] # don't need to take everything
indexes = np.where(directors_actors_df['director'] == directors_actors_df['actor']) # finding indexes where director is equal to actor
result = directors_actors_df.iloc[indexes].sort_values(by=['director']).groupby('director', as_index=False).size()
result.rename(columns = {'director':'director - actor', 'size':'how many times'}, inplace = True)
result.loc[result['how many times'] > 1]

Unnamed: 0,director - actor,how many times
5,Akiva Schaffer,2
11,Amy Poehler,2
12,Amy Schumer,2
21,Audu Paden,2
24,Barbra Streisand,2
29,Bo Burnham,3
37,Cem Yılmaz,3
42,Chia-Liang Liu,2
50,Clint Eastwood,4
62,Demetri Martin,2
