# Feature Enginering

## Import Module and Dataset

In [27]:
# Import modules
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [28]:
# Load the data
data = pd.read_csv('ratings_small.csv')
data

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


## Aggregate

### Calculate aggregate values from ratings dataset

In [29]:
# Change data types of timestamp
pd.to_datetime(data['timestamp'], unit='s')

0        2009-12-14 02:52:24
1        2009-12-14 02:52:59
2        2009-12-14 02:53:02
3        2009-12-14 02:53:05
4        2009-12-14 02:53:25
                 ...        
99999    2003-10-08 02:16:10
100000   2003-10-03 02:46:41
100001   2003-12-09 03:26:03
100002   2003-12-09 14:21:03
100003   2004-01-22 15:18:44
Name: timestamp, Length: 100004, dtype: datetime64[ns]

In [30]:
# Check the result
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100004 entries, 0 to 100003
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100004 non-null  int64  
 1   movieId    100004 non-null  int64  
 2   rating     100004 non-null  float64
 3   timestamp  100004 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


In [31]:
# Checking duplicate values
data.duplicated().sum()

np.int64(0)

Now, we can make an aggregate value from table.
In this section, we want to make the data one to many

In [32]:
# Check rating using mean
data.groupby('movieId')['rating'].mean()

movieId
1         3.872470
2         3.401869
3         3.161017
4         2.384615
5         3.267857
            ...   
161944    5.000000
162376    4.500000
162542    5.000000
162672    3.000000
163949    5.000000
Name: rating, Length: 9066, dtype: float64

In [33]:
# Using aggregate function
data.groupby('movieId')['rating'].agg(['mean', 'median'])

Unnamed: 0_level_0,mean,median
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.872470,4.0
2,3.401869,3.0
3,3.161017,3.0
4,2.384615,3.0
5,3.267857,3.0
...,...,...
161944,5.000000,5.0
162376,4.500000,4.5
162542,5.000000,5.0
162672,3.000000,3.0


In [34]:
# Aggregate from 2 columns or more
data2 = data.groupby('movieId').agg({'userId' : pd.Series.nunique, 'rating' : ['mean', 'median']})
data2

Unnamed: 0_level_0,userId,rating,rating
Unnamed: 0_level_1,nunique,mean,median
movieId,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,247,3.872470,4.0
2,107,3.401869,3.0
3,59,3.161017,3.0
4,13,2.384615,3.0
5,56,3.267857,3.0
...,...,...,...
161944,1,5.000000,5.0
162376,1,4.500000,4.5
162542,1,5.000000,5.0
162672,1,3.000000,3.0


## Join

There are five types of merging data, is:
1. Inner join
2. Outer Join
3. Left join
4. Right join
5. Cross join

### Load Dataframe

In [35]:
# First, load movies_metadata
mov = pd.read_csv("movies_metadata.csv")
mov

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,True,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,True,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,True,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,True,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,True,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45458,True,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...",http://www.imdb.com/title/tt6209470/,439050,tt6209470,fa,رگ خواب,Rising and falling between a man and woman.,...,,0.0,90.0,"[{'iso_639_1': 'fa', 'name': 'فارسی'}]",Released,Rising and falling between a man and woman,Subdue,False,4.0,1.0
45459,True,,0,"[{'id': 18, 'name': 'Drama'}]",,111109,tt2028550,tl,Siglo ng Pagluluwal,An artist struggles to finish his work while a...,...,2011-11-17,0.0,360.0,"[{'iso_639_1': 'tl', 'name': ''}]",Released,,Century of Birthing,False,9.0,3.0
45460,True,,0,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",,67758,tt0303758,en,Betrayal,"When one of her hits goes wrong, a professiona...",...,2003-08-01,0.0,90.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A deadly game of wits.,Betrayal,False,3.8,6.0
45461,True,,0,[],,227506,tt0008536,en,Satana likuyushchiy,"In a small town live two brothers, one a minis...",...,1917-10-21,0.0,87.0,[],Released,,Satan Triumphant,False,0.0,0.0


### Get Dataframe only english release languange

In [36]:
# Make a new dataset with english languange released
mov1 = mov[(mov["status"] == "Released") & (mov['original_language'] == 'en')]
mov1

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,True,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,True,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,True,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,True,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,True,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45456,True,,0,"[{'id': 878, 'name': 'Science Fiction'}]",,222848,tt0112613,en,Caged Heat 3000,It's the year 3000 AD. The world's most danger...,...,1995-01-01,0.0,85.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Caged Heat 3000,False,3.5,1.0
45457,True,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",,30840,tt0102797,en,Robin Hood,"Yet another version of the classic epic, with ...",...,1991-05-13,0.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Robin Hood,False,5.7,26.0
45460,True,,0,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",,67758,tt0303758,en,Betrayal,"When one of her hits goes wrong, a professiona...",...,2003-08-01,0.0,90.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A deadly game of wits.,Betrayal,False,3.8,6.0
45461,True,,0,[],,227506,tt0008536,en,Satana likuyushchiy,"In a small town live two brothers, one a minis...",...,1917-10-21,0.0,87.0,[],Released,,Satan Triumphant,False,0.0,0.0


In [38]:
mov1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31952 entries, 0 to 45462
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  31952 non-null  bool   
 1   belongs_to_collection  3099 non-null   object 
 2   budget                 31952 non-null  int64  
 3   genres                 31952 non-null  object 
 4   homepage               6103 non-null   object 
 5   id                     31952 non-null  int64  
 6   imdb_id                31941 non-null  object 
 7   original_language      31952 non-null  object 
 8   original_title         31952 non-null  object 
 9   overview               31888 non-null  object 
 10  popularity             31952 non-null  float64
 11  poster_path            31641 non-null  object 
 12  production_companies   31952 non-null  object 
 13  production_countries   31952 non-null  object 
 14  release_date           31893 non-null  object 
 15  revenue

### Merge Between movies rating and english released dataset

In [57]:
# Now, we want to merge movies rating and english released dataset
mov2 = mov1.merge(data, left_on = 'id', right_on = 'movieId', how ='inner')
mov2

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,status,tagline,title,video,vote_average,vote_count,userId,movieId,rating,timestamp
0,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Released,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,23,949,3.5,1148721092
1,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Released,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,102,949,4.0,956598942
2,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Released,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,232,949,2.0,955092697
3,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Released,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,242,949,5.0,956688825
4,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Released,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,263,949,3.0,1117846575
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33237,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Released,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,518,3104,5.0,945363668
33238,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Released,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,547,3104,5.0,981312902
33239,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Released,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,585,3104,4.0,975362967
33240,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Released,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,624,3104,4.0,1019124279


In [None]:
# Get the info
mov2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33242 entries, 0 to 33241
Data columns (total 28 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  33242 non-null  bool   
 1   belongs_to_collection  8670 non-null   object 
 2   budget                 33242 non-null  int64  
 3   genres                 33242 non-null  object 
 4   homepage               8430 non-null   object 
 5   id                     33242 non-null  int64  
 6   imdb_id                33242 non-null  object 
 7   original_language      33242 non-null  object 
 8   original_title         33242 non-null  object 
 9   overview               33237 non-null  object 
 10  popularity             33242 non-null  float64
 11  poster_path            33207 non-null  object 
 12  production_companies   33242 non-null  object 
 13  production_countries   33242 non-null  object 
 14  release_date           33213 non-null  object 
 15  re

## Feature Engineering

In [41]:
# Now, we want to make a feature engineering
mov2['profit'] = mov2['revenue'] - mov2['budget']
mov2

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,tagline,title,video,vote_average,vote_count,userId,movieId,rating,timestamp,profit
0,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,23,949,3.5,1148721092,127436818.0
1,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,102,949,4.0,956598942,127436818.0
2,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,232,949,2.0,955092697,127436818.0
3,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,242,949,5.0,956688825,127436818.0
4,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,A Los Angeles Crime Saga,Heat,False,7.7,1886.0,263,949,3.0,1117846575,127436818.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33237,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,518,3104,5.0,945363668,0.0
33238,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,547,3104,5.0,981312902,0.0
33239,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,585,3104,4.0,975362967,0.0
33240,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Now Frankenstein has created a beautiful woman...,Frankenstein Created Woman,False,5.9,33.0,624,3104,4.0,1019124279,0.0


In [44]:
# Film that hte biggest profit
mov2[['original_title', 'profit']].sort_values('profit', ascending=False)

Unnamed: 0,original_title,profit
6744,Titanic,1.645034e+09
6771,Titanic,1.645034e+09
6779,Titanic,1.645034e+09
6778,Titanic,1.645034e+09
6777,Titanic,1.645034e+09
...,...,...
11107,The 13th Warrior,-9.830110e+07
11106,The 13th Warrior,-9.830110e+07
11105,The 13th Warrior,-9.830110e+07
11104,The 13th Warrior,-9.830110e+07


In [46]:
# Film more than 3 hours
mov2['long_duration_movie'] = mov2['runtime'] >= 180
mov2

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,title,video,vote_average,vote_count,userId,movieId,rating,timestamp,profit,long_duration_movie
0,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Heat,False,7.7,1886.0,23,949,3.5,1148721092,127436818.0,False
1,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Heat,False,7.7,1886.0,102,949,4.0,956598942,127436818.0,False
2,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Heat,False,7.7,1886.0,232,949,2.0,955092697,127436818.0,False
3,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Heat,False,7.7,1886.0,242,949,5.0,956688825,127436818.0,False
4,True,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,Heat,False,7.7,1886.0,263,949,3.0,1117846575,127436818.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33237,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Frankenstein Created Woman,False,5.9,33.0,518,3104,5.0,945363668,0.0,False
33238,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Frankenstein Created Woman,False,5.9,33.0,547,3104,5.0,981312902,0.0,False
33239,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Frankenstein Created Woman,False,5.9,33.0,585,3104,4.0,975362967,0.0,False
33240,True,"{'id': 123720, 'name': 'Frankenstein (Hammer S...",0,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,3104,tt0061683,en,Frankenstein Created Woman,A deformed tormented girl drowns herself after...,...,Frankenstein Created Woman,False,5.9,33.0,624,3104,4.0,1019124279,0.0,False


In [53]:
mov2['title_length'] = mov2['original_title'].str.len()
mov2['title_length'] < 10

0         True
1         True
2         True
3         True
4         True
         ...  
33237    False
33238    False
33239    False
33240    False
33241    False
Name: title_length, Length: 33242, dtype: bool

In [51]:
# Make length of original_title
count_list = []
for i in range(len(mov2)):
    count = len(mov2['title'][i])
    count_list.append(count)

mov2['title_len'] = count_list
mov2['title_len']

0         4
1         4
2         4
3         4
4         4
         ..
33237    26
33238    26
33239    26
33240    26
33241    26
Name: title_len, Length: 33242, dtype: int64

In [54]:
# Make runtime column from minutes to hours
mov2['duration_hours'] = pd.to_datetime(mov2.runtime, unit='m').dt.strftime('%H:%M')
mov2['duration_hours']

0        02:50
1        02:50
2        02:50
3        02:50
4        02:50
         ...  
33237    01:32
33238    01:32
33239    01:32
33240    01:32
33241    01:32
Name: duration_hours, Length: 33242, dtype: object