## Optimizing A Data Set for Memory Usage.
## We optimize for memory management.

In [1]:
##this steps out of the normal things we were doing
##and you get to realize there is a lot we can do with pandas
import pandas as pd
employees=pd.read_csv("employees.csv")
pd.DataFrame(employees)
employees

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,8/6/93,,True,Marketing
1,Thomas,Male,3/31/96,61933.0,True,
2,Maria,Female,,130590.0,False,Finance
3,Jerry,,3/4/05,138705.0,True,Finance
4,Larry,Male,1/24/98,101004.0,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1/31/84,42392.0,False,Finance
997,Russell,Male,5/20/13,96914.0,False,Product
998,Larry,Male,4/20/13,60500.0,False,Business Dev
999,Albert,Male,5/15/12,129949.0,True,Sales


In [20]:
##yeah see the memory usage it  favourable
employees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1001 entries, 0 to 1000
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   First Name  933 non-null    object 
 1   Gender      854 non-null    object 
 2   Start Date  999 non-null    object 
 3   Salary      999 non-null    float64
 4   Mgmt        933 non-null    object 
 5   Team        957 non-null    object 
dtypes: float64(1), object(5)
memory usage: 47.1+ KB


### Convert Data Types with the astype Method

In [3]:
employees["Mgmt"] = employees["Mgmt"].astype(bool)
employees

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,8/6/93,,True,Marketing
1,Thomas,Male,3/31/96,61933.0,True,
2,Maria,Female,,130590.0,False,Finance
3,Jerry,,3/4/05,138705.0,True,Finance
4,Larry,Male,1/24/98,101004.0,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1/31/84,42392.0,False,Finance
997,Russell,Male,5/20/13,96914.0,False,Product
998,Larry,Male,4/20/13,60500.0,False,Business Dev
999,Albert,Male,5/15/12,129949.0,True,Sales


In [6]:
employees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1001 entries, 0 to 1000
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   First Name  933 non-null    object 
 1   Gender      854 non-null    object 
 2   Start Date  999 non-null    object 
 3   Salary      999 non-null    float64
 4   Mgmt        1001 non-null   bool   
 5   Team        957 non-null    object 
dtypes: bool(1), float64(1), object(4)
memory usage: 40.2+ KB


In [25]:
employees["Salary"].fillna(2342).tail()

996      42392.0
997      96914.0
998      60500.0
999     129949.0
1000      2342.0
Name: Salary, dtype: float64

In [12]:
employees["Salary"].fillna(0).astype(int).tail()

996      42392
997      96914
998      60500
999     129949
1000         0
Name: Salary, dtype: int64

In [13]:
employees["Salary"] = employees["Salary"].fillna(0).astype(int)

In [14]:
employees.nunique()

First Name    200
Gender          2
Start Date    971
Salary        995
Mgmt            2
Team           10
dtype: int64

## Filtering by a Single Condition

In [27]:
employees[employees["Team"] != "Finance"]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
...,...,...,...,...,...,...
995,Henry,,2014-11-23,132483,False,Distribution
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev
999,Albert,Male,2012-05-15,129949,True,Sales


In [28]:
employees[employees["Mgmt"]].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
6,Ruby,Female,1987-08-17,65476,True,Product


In [29]:
high_earners = employees["Salary"] > 100000
high_earners.head()

0    False
1    False
2     True
3     True
4     True
Name: Salary, dtype: bool

In [33]:
high_earners = employees["Salary"] > 100000
employees[high_earners].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
2,Maria,Female,,130590.0,False,Finance
3,Jerry,,3/4/05,138705.0,True,Finance
4,Larry,Male,1/24/98,101004.0,True,IT
5,Dennis,Male,4/18/87,115163.0,False,Legal
9,Frances,Female,8/8/02,139852.0,True,Business Dev


##  Filtering by Multiple Conditions

### The AND Condition &

In [31]:
is_female = employees["Gender"] == "Female"

In [32]:
in_biz_dev = employees["Team"] == "Business Dev"

In [34]:
is_manager = employees["Mgmt"]
employees[is_female & in_biz_dev & is_manager].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
9,Frances,Female,2002-08-08,139852,True,Business Dev
38,Stephanie,Female,1986-09-13,36844,True,Business Dev
66,Nancy,Female,2012-12-15,125250,True,Business Dev
92,Linda,Female,2000-05-25,119009,True,Business Dev
111,Bonnie,Female,1999-12-17,42153,True,Business Dev


### The OR Condition

In [35]:
earning_below_40k = employees["Salary"] < 40000
started_after_2015 = employees["Start Date"] > "2015-01-01"

In [36]:
employees[earning_below_40k | started_after_2015].tail()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
958,Gloria,Female,1987-10-24,39833,False,Engineering
964,Bruce,Male,1980-05-07,35802,True,Sales
967,Thomas,Male,2016-03-12,105681,False,Engineering
989,Justin,,1991-02-10,38344,False,Legal
1000,,,NaT,0,True,


### The isin Method rarely used

In [41]:
sales = employees["Team"] == "Sales"
legal = employees["Team"] == "Legal"
mktg  = employees["Team"] == "Marketing"
employees[sales | legal | mktg].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
5,Dennis,Male,1987-04-18,115163,False,Legal
11,Julie,Female,1997-10-26,102508,True,Legal
13,Gary,Male,2008-01-27,109831,False,Sales
20,Lois,,1995-04-22,64714,True,Legal


In [42]:
all_star_teams = ["Sales", "Legal", "Marketing"]
on_all_star_teams = employees["Team"].isin(all_star_teams)
employees[on_all_star_teams].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
5,Dennis,Male,1987-04-18,115163,False,Legal
11,Julie,Female,1997-10-26,102508,True,Legal
13,Gary,Male,2008-01-27,109831,False,Sales
20,Lois,,1995-04-22,64714,True,Legal


### The between Method

In [44]:
between_80k_and_90k = employees["Salary"].between(80000, 90000)
employees[between_80k_and_90k].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
19,Donna,Female,2010-07-22,81014,False,Product
31,Joyce,,2005-02-20,88657,False,Product
35,Theresa,Female,2006-10-10,85182,False,Sales
45,Roger,Male,1980-04-17,88010,True,Sales
54,Sara,Female,2007-08-15,83677,False,Engineering


In [45]:
eighties_folk = employees["Start Date"].between(
    left = "1980-01-01",
    right = "1990-01-01"
)

employees[eighties_folk].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
10,Louise,Female,1980-08-12,63241,True,
12,Brandon,Male,1980-12-01,112807,True,HR
17,Shawn,Male,1986-12-07,111737,False,Product


In [46]:
name_starts_with_r = employees["First Name"].between("R", "S")
employees[name_starts_with_r].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
6,Ruby,Female,1987-08-17,65476,True,Product
36,Rachel,Female,2009-02-16,142032,False,Business Dev
45,Roger,Male,1980-04-17,88010,True,Sales
67,Rachel,Female,1999-08-16,51178,True,Finance
78,Robin,Female,1983-06-04,114797,True,Sales


###  The isnull and notnull Methods

In [47]:
employees.head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT


In [48]:
employees["Team"].isnull().head()

0    False
1     True
2    False
3    False
4    False
Name: Team, dtype: bool

In [49]:
employees["Start Date"].isnull().head()

0    False
1    False
2     True
3    False
4    False
Name: Start Date, dtype: bool

In [50]:
employees["Team"].notnull().head()

0     True
1    False
2     True
3     True
4     True
Name: Team, dtype: bool

In [51]:
(~employees["Team"].isnull()).head()

0     True
1    False
2     True
3     True
4     True
Name: Team, dtype: bool

In [52]:
no_team = employees["Team"].isnull()
employees[no_team].head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
1,Thomas,Male,1996-03-31,61933,True,
10,Louise,Female,1980-08-12,63241,True,
23,,Male,2012-06-14,125792,True,
32,,Male,1998-08-21,122340,True,
91,James,,2005-01-26,128771,False,


In [53]:
has_name = employees["First Name"].notnull()
employees[has_name].tail()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
995,Henry,,2014-11-23,132483,False,Distribution
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev
999,Albert,Male,2012-05-15,129949,True,Sales


In [36]:
employees

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
3,Jerry,,2005-03-04,138705.0,True,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev
999,Albert,Male,2012-05-15,129949.0,True,Sales


In [38]:
employees.dropna()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
4,Larry,Male,1998-01-24,101004.0,True,IT
5,Dennis,Male,1987-04-18,115163.0,False,Legal
6,Ruby,Female,1987-08-17,65476.0,True,Product
8,Angela,Female,2005-11-22,95570.0,True,Engineering
9,Frances,Female,2002-08-08,139852.0,True,Business Dev
...,...,...,...,...,...,...
994,George,Male,2013-06-21,98874.0,True,Marketing
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev


In [57]:
employees.dropna(how = "all").tail()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
995,Henry,,2014-11-23,132483.0,False,Distribution
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev
999,Albert,Male,2012-05-15,129949.0,True,Sales


In [42]:
employees.dropna(how = "any")

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
4,Larry,Male,1998-01-24,101004.0,True,IT
5,Dennis,Male,1987-04-18,115163.0,False,Legal
6,Ruby,Female,1987-08-17,65476.0,True,Product
8,Angela,Female,2005-11-22,95570.0,True,Engineering
9,Frances,Female,2002-08-08,139852.0,True,Business Dev
...,...,...,...,...,...,...
994,George,Male,2013-06-21,98874.0,True,Marketing
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev


In [40]:
employees.dropna(subset = ["Gender"]).tail()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
994,George,Male,2013-06-21,98874.0,True,Marketing
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev
999,Albert,Male,2012-05-15,129949.0,True,Sales


In [60]:
employees.dropna(subset = ["Start Date", "Salary"]).head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
1,Thomas,Male,1996-03-31,61933.0,True,
3,Jerry,,2005-03-04,138705.0,True,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT
5,Dennis,Male,1987-04-18,115163.0,False,Legal
6,Ruby,Female,1987-08-17,65476.0,True,Product


In [61]:
employees.dropna(how = "any", thresh = 4).head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
3,Jerry,,2005-03-04,138705.0,True,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT


## Dealing with Duplicates

### The duplicated Method

In [9]:
employees["Team"].head()

0    Marketing
1          NaN
2      Finance
3      Finance
4           IT
Name: Team, dtype: object

In [63]:
employees["Team"].duplicated().head()

0    False
1    False
2    False
3     True
4    False
Name: Team, dtype: bool

In [64]:
employees["Team"].duplicated(keep = "first").head()

0    False
1    False
2    False
3     True
4    False
Name: Team, dtype: bool

In [65]:
employees["Team"].duplicated(keep = "last")

0        True
1        True
2        True
3        True
4        True
        ...  
996     False
997     False
998     False
999     False
1000    False
Name: Team, Length: 1001, dtype: bool

In [66]:
(~employees["Team"].duplicated()).head()

0     True
1     True
2     True
3    False
4     True
Name: Team, dtype: bool

In [67]:
first_one_in_team = ~employees["Team"].duplicated()
employees[first_one_in_team]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT
5,Dennis,Male,1987-04-18,115163.0,False,Legal
6,Ruby,Female,1987-08-17,65476.0,True,Product
8,Angela,Female,2005-11-22,95570.0,True,Engineering
9,Frances,Female,2002-08-08,139852.0,True,Business Dev
12,Brandon,Male,1980-12-01,112807.0,True,HR
13,Gary,Male,2008-01-27,109831.0,False,Sales


### The drop_duplicates Method

In [68]:
employees.drop_duplicates()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
3,Jerry,,2005-03-04,138705.0,True,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev
999,Albert,Male,2012-05-15,129949.0,True,Sales


In [69]:
employees.drop_duplicates(subset = ["Team"])

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT
5,Dennis,Male,1987-04-18,115163.0,False,Legal
6,Ruby,Female,1987-08-17,65476.0,True,Product
8,Angela,Female,2005-11-22,95570.0,True,Engineering
9,Frances,Female,2002-08-08,139852.0,True,Business Dev
12,Brandon,Male,1980-12-01,112807.0,True,HR
13,Gary,Male,2008-01-27,109831.0,False,Sales


In [70]:
employees.drop_duplicates(subset = ["Team"], keep = "last")

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
988,Alice,Female,2004-10-05,47638.0,False,HR
989,Justin,,1991-02-10,38344.0,False,Legal
990,Robin,Female,1987-07-24,100765.0,True,IT
993,Tina,Female,1997-05-15,56450.0,True,Engineering
994,George,Male,2013-06-21,98874.0,True,Marketing
995,Henry,,2014-11-23,132483.0,False,Distribution
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev
999,Albert,Male,2012-05-15,129949.0,True,Sales


In [71]:
employees.drop_duplicates(subset = ["First Name"], keep = False)

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
5,Dennis,Male,1987-04-18,115163.0,False,Legal
8,Angela,Female,2005-11-22,95570.0,True,Engineering
33,Jean,Female,1993-12-18,119082.0,False,Business Dev
190,Carol,Female,1996-03-19,57783.0,False,Finance
291,Tammy,Female,1984-11-11,132839.0,True,IT
495,Eugene,Male,1984-05-24,81077.0,False,Sales
688,Brian,Male,2007-04-07,93901.0,True,Legal
832,Keith,Male,2003-02-12,120672.0,False,Legal
887,David,Male,2009-12-05,92242.0,False,Legal


In [72]:
name_is_douglas = employees["First Name"] == "Douglas"
is_male = employees["Gender"] == "Male"
employees[name_is_douglas & is_male]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
217,Douglas,Male,1999-09-03,83341.0,True,IT
322,Douglas,Male,2002-01-08,41428.0,False,Product
835,Douglas,Male,2007-08-04,132175.0,False,Engineering


In [73]:
employees.drop_duplicates(subset = ["Gender", "Team"]).head()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
3,Jerry,,2005-03-04,138705.0,True,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT


## Coding Challenge

In [10]:
netflix=pd.read_csv("netflix.csv")
pd.DataFrame(netflix)
netflix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5837 entries, 0 to 5836
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   title       5837 non-null   object
 1   director    3936 non-null   object
 2   date_added  5195 non-null   object
 3   type        5837 non-null   object
dtypes: object(4)
memory usage: 182.5+ KB


In [14]:
netflix = pd.read_csv("netflix.csv", parse_dates = ["date_added"],format=["%d%m%y])
netflix

SyntaxError: unterminated string literal (detected at line 1) (1865033106.py, line 1)

In [15]:
netflix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5837 entries, 0 to 5836
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   title       5837 non-null   object
 1   director    3936 non-null   object
 2   date_added  5195 non-null   object
 3   type        5837 non-null   object
dtypes: object(4)
memory usage: 182.5+ KB


In [16]:
directed_by_robert_rodriguez = ( netflix["director"] == "Robert Rodriguez")
is_movie = netflix["type"] == "Movie"

netflix[directed_by_robert_rodriguez & is_movie]

Unnamed: 0,title,director,date_added,type
1384,Spy Kids: All the Time in the World,Robert Rodriguez,19-Feb-19,Movie
1416,Spy Kids 3: Game Over,Robert Rodriguez,1-Apr-19,Movie
1460,Spy Kids 2: The Island of Lost Dreams,Robert Rodriguez,8-Mar-19,Movie
2890,Sin City,Robert Rodriguez,1-Oct-19,Movie
3836,Shorts,Robert Rodriguez,1-Jul-19,Movie
3883,Spy Kids,Robert Rodriguez,1-Apr-19,Movie


In [17]:
added_on_july_31 = netflix["date_added"] == "2019-07-31"
directed_by_altman = netflix["director"] == "Robert Altman"
netflix[added_on_july_31 | directed_by_altman]

Unnamed: 0,title,director,date_added,type
611,Popeye,Robert Altman,24-Nov-19,Movie
1092,Gosford Park,Robert Altman,1-Nov-19,Movie


In [18]:
directors = ["Orson Welles", "Aditya Kripalani", "Sam Raimi"]
target_directors = netflix["director"].isin(directors)
netflix[target_directors]

Unnamed: 0,title,director,date_added,type
946,The Stranger,Orson Welles,19-Jul-18,Movie
1870,The Gift,Sam Raimi,20-Nov-19,Movie
3706,Spider-Man 3,Sam Raimi,1-Nov-19,Movie
4243,Tikli and Laxmi Bomb,Aditya Kripalani,1-Aug-18,Movie
4475,The Other Side of the Wind,Orson Welles,2-Nov-18,Movie
5115,Tottaa Pataaka Item Maal,Aditya Kripalani,25-Jun-19,Movie


In [19]:
may_movies = netflix["date_added"].between(
    "2019-05-01", "2019-06-01"
)

netflix[may_movies].head()
may_movies = netflix["date_added"].between(
    "2017-05-01", "2019-06-")
netflix[may_movies].tail()

Unnamed: 0,title,director,date_added,type


In [54]:
netflix.dropna(subset = ["director"]).head()

Unnamed: 0,title,director,date_added,type
1,A Patch of Fog,Michael Lennox,15-Apr-17,Movie
3,Uriyadi 2,Vijay Kumar,2-Aug-19,Movie
4,Shrek the Musical,Jason Moore,29-Dec-13,Movie
5,Schubert In Love,Lars Büchel,1-Mar-18,Movie
6,We Have Always Lived in the Castle,Stacie Passon,14-Sep-19,Movie


In [20]:
netflix.drop_duplicates(subset = ["date_added"], keep = False)

Unnamed: 0,title,director,date_added,type
4,Shrek the Musical,Jason Moore,29-Dec-13,Movie
12,Without Gorky,Cosima Spender,31-May-17,Movie
30,Anjelah Johnson: Not Fancy,Jay Karas,2-Oct-15,Movie
38,One Last Thing,Tim Rouhana,25-Aug-19,Movie
70,Marvel's Iron Man & Hulk: Heroes United,Leo Riley,16-Feb-14,Movie
...,...,...,...,...
5748,Menorca,John Barnard,27-Aug-17,Movie
5749,Green Room,Jeremy Saulnier,12-Nov-18,Movie
5788,Chris Brown: Welcome to My Life,Andrew Sandler,7-Oct-17,Movie
5789,A Very Murray Christmas,Sofia Coppola,4-Dec-15,Movie
