In [1]:
import pandas as pd
import ast
import requests
import nltk
from nltk.stem.porter import PorterStemmer
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import operator


# Read the CSV files
movies = pd.read_csv('/content/drive/MyDrive/tmdb_5000_movies.csv')
credits = pd.read_csv('/content/drive/MyDrive/tmdb_5000_credits.csv')

# Merge the datasets
dataset = movies.merge(credits, on='title')

# Select relevant columns
dataset = dataset[['movie_id','title','tagline','overview','genres','keywords','cast','crew','popularity','release_date']]



In [2]:
# Function to fetch trailer key using movie ID
def fetch_trailer_key(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}/videos?api_key={API_KEY}"
    data = requests.get(url).json()
    results = data.get('results', [])
    if results:
        return results[0]['key']
    return None

In [3]:
# Function to fetch poster path using movie ID
def fetch_poster(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}&language=en-US"
    data = requests.get(url).json()
    poster_path = data.get('poster_path')
    if poster_path:
        return "https://image.tmdb.org/t/p/w500/" + poster_path
    return None

In [4]:
# Define API key for movie details
API_KEY = "727f59434bfa62e867027f2696613509"


In [5]:
# Fetch poster paths and trailer keys for each movie
poster_paths = []
trailer_keys = []
for movie_id in dataset['movie_id']:
    poster_paths.append(fetch_poster(movie_id))
    trailer_keys.append(fetch_trailer_key(movie_id))

In [6]:
# Add 'poster_path' and 'trailer_key' columns to the dataset
dataset['poster_path'] = poster_paths
dataset['trailer_key'] = trailer_keys

In [7]:
# Display the updated dataset
dataset.head(1)

Unnamed: 0,movie_id,title,tagline,overview,genres,keywords,cast,crew,popularity,release_date,poster_path,trailer_key
0,19995,Avatar,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",150.437577,2009-12-10,https://image.tmdb.org/t/p/w500//kyeqWdyUXW608...,LgZ2MDuJvhc


In [8]:
dataset = dataset[['movie_id','title','tagline','overview','genres','keywords','cast','crew','popularity','release_date','poster_path','trailer_key']]
dataset.head(150)

Unnamed: 0,movie_id,title,tagline,overview,genres,keywords,cast,crew,popularity,release_date,poster_path,trailer_key
0,19995,Avatar,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",150.437577,2009-12-10,https://image.tmdb.org/t/p/w500//kyeqWdyUXW608...,LgZ2MDuJvhc
1,285,Pirates of the Caribbean: At World's End,"At the end of the world, the adventure begins.","Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",139.082615,2007-05-19,https://image.tmdb.org/t/p/w500//jGWpG4YhpQwVm...,HKSZtp_OGHY
2,206647,Spectre,A Plan No One Escapes,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",107.376788,2015-10-26,https://image.tmdb.org/t/p/w500//672kUEMtTHcaV...,O-Y8-1esSLI
3,49026,The Dark Knight Rises,The Legend Ends,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",112.312950,2012-07-16,https://image.tmdb.org/t/p/w500//85cWkCVftiVs0...,GAjBzu8ggi0
4,49529,John Carter,"Lost in our world, found in another.","John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",43.926995,2012-03-07,https://image.tmdb.org/t/p/w500//lCxz1Yus07QCQ...,WR6HUkzxjR0
...,...,...,...,...,...,...,...,...,...,...,...,...
145,652,Troy,For passion. For honor. For destiny. For victo...,"In year 1250 B.C. during the late Bronze age, ...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 18, ""...","[{""id"": 380, ""name"": ""brother brother relation...","[{""cast_id"": 43, ""character"": ""Achilles"", ""cre...","[{""credit_id"": ""52fe4264c3a36847f801b043"", ""de...",66.803149,2004-05-13,https://image.tmdb.org/t/p/w500//51auXjXepW1zb...,qg-TZ22fXzg
146,80321,Madagascar 3: Europe's Most Wanted,"Six years ago, they disappeared without a trac...","Alex, Marty, Gloria and Melman are still tryin...","[{""id"": 16, ""name"": ""Animation""}, {""id"": 10751...","[{""id"": 3645, ""name"": ""madagascar""}, {""id"": 20...","[{""cast_id"": 2, ""character"": ""Alex (voice)"", ""...","[{""credit_id"": ""52fe47b89251416c910737a5"", ""de...",44.989192,2012-06-06,https://image.tmdb.org/t/p/w500//ekraj4ksvIKeu...,FsyBwLYDidA
147,36669,Die Another Day,He’s never been cooler.,Bond takes on a North Korean leader who underg...,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 28, ""...","[{""id"": 3290, ""name"": ""laser""}, {""id"": 156095,...","[{""cast_id"": 20, ""character"": ""James Bond"", ""c...","[{""credit_id"": ""52fe45ff9251416c91045a85"", ""de...",54.159392,2002-11-17,https://image.tmdb.org/t/p/w500//bZmGqOhMhaLn8...,NeWG3lSw34A
148,43074,Ghostbusters,Who You Gonna Call?,"Following a ghost invasion of Manhattan, paran...","[{""id"": 28, ""name"": ""Action""}, {""id"": 14, ""nam...","[{""id"": 5248, ""name"": ""female friendship""}, {""...","[{""cast_id"": 14, ""character"": ""Abby Yates"", ""c...","[{""credit_id"": ""58c1356392514173100088dc"", ""de...",66.218060,2016-07-14,https://image.tmdb.org/t/p/w500//wJmWliwXIgZOC...,ISFi4Sag7j8


In [9]:
release_dates = dataset['release_date']
release_dates.head(5000)


0       2009-12-10
1       2007-05-19
2       2015-10-26
3       2012-07-16
4       2012-03-07
           ...    
4804    1992-09-04
4805    2011-12-26
4806    2013-10-13
4807    2012-05-03
4808    2005-08-05
Name: release_date, Length: 4809, dtype: object

In [10]:
dataset['release_date'] = dataset['release_date'].apply(lambda x: str(x).split("-")[0])
dataset.head(2000)


Unnamed: 0,movie_id,title,tagline,overview,genres,keywords,cast,crew,popularity,release_date,poster_path,trailer_key
0,19995,Avatar,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",150.437577,2009,https://image.tmdb.org/t/p/w500//kyeqWdyUXW608...,LgZ2MDuJvhc
1,285,Pirates of the Caribbean: At World's End,"At the end of the world, the adventure begins.","Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",139.082615,2007,https://image.tmdb.org/t/p/w500//jGWpG4YhpQwVm...,HKSZtp_OGHY
2,206647,Spectre,A Plan No One Escapes,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",107.376788,2015,https://image.tmdb.org/t/p/w500//672kUEMtTHcaV...,O-Y8-1esSLI
3,49026,The Dark Knight Rises,The Legend Ends,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",112.312950,2012,https://image.tmdb.org/t/p/w500//85cWkCVftiVs0...,GAjBzu8ggi0
4,49529,John Carter,"Lost in our world, found in another.","John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",43.926995,2012,https://image.tmdb.org/t/p/w500//lCxz1Yus07QCQ...,WR6HUkzxjR0
...,...,...,...,...,...,...,...,...,...,...,...,...
1995,10663,The Waterboy,You can mess with him. But don't mess with his...,Bobby Boucher is a water boy for a struggling ...,"[{""id"": 35, ""name"": ""Comedy""}]","[{""id"": 6075, ""name"": ""sport""}, {""id"": 171556,...","[{""cast_id"": 1, ""character"": ""Bobby Boucher"", ...","[{""credit_id"": ""52fe439e9251416c750177b1"", ""de...",23.839127,1998,https://image.tmdb.org/t/p/w500//miT42qWYC4D0n...,vVLvkqfTRVQ
1996,1891,The Empire Strikes Back,The Adventure Continues...,"The epic saga continues as Luke Skywalker, in ...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 28, ""...","[{""id"": 526, ""name"": ""rebel""}, {""id"": 803, ""na...","[{""cast_id"": 7, ""character"": ""Luke Skywalker"",...","[{""credit_id"": ""566e19f292514169e200d46f"", ""de...",78.517830,1980,https://image.tmdb.org/t/p/w500//2l05cFWJacyIs...,vU6L3jXt2r8
1997,9737,Bad Boys,Whatcha gonna do?,Marcus Burnett is a hen-pecked family man. Mik...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 35, ""nam...","[{""id"": 416, ""name"": ""miami""}, {""id"": 703, ""na...","[{""cast_id"": 47, ""character"": ""Detective Mike ...","[{""credit_id"": ""52fe4524c3a36847f80bec6d"", ""de...",33.872182,1995,https://image.tmdb.org/t/p/w500//x1ygBecKHfXX4...,6T6p56VVRY0
1998,37137,The Naked Gun 2½: The Smell of Fear,If you see only one movie this year...you ough...,Bumbling cop Frank Drebin is out to foil the b...,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 80, ""nam...","[{""id"": 521, ""name"": ""washington d.c.""}, {""id""...","[{""cast_id"": 7, ""character"": ""Frank Drebin"", ""...","[{""credit_id"": ""57a52b11925141332d006a42"", ""de...",27.332560,1991,https://image.tmdb.org/t/p/w500//v9niLQWVzVPB1...,nw-25LgEmpY


In [11]:
dataset.duplicated().sum()

0

In [12]:
def convert(obj):
    L=[]
    for i in ast.literal_eval(obj):
        L.append(i['name'])
    return L

In [13]:
dataset['genres'] = dataset['genres'].apply(convert)

In [14]:
dataset['keywords'] = dataset['keywords'].apply(convert)

In [15]:
def convertCast(obj):
    L=[]
    counter=0
    for i in ast.literal_eval(obj):
        if counter!=4:
            L.append(i['name'])
            counter+=1
        else:
            break
    return L

In [16]:
dataset['cast'] = dataset['cast'].apply(convertCast)

In [17]:
def convertCrew(obj):
    L=[]
    for i in ast.literal_eval(obj):
        if i['job'] == 'Director':
            L.append(i['name'])
            break
    return L

In [18]:
dataset['crew'] = dataset['crew'].apply(convertCrew)

In [19]:
dataset.head(1)

Unnamed: 0,movie_id,title,tagline,overview,genres,keywords,cast,crew,popularity,release_date,poster_path,trailer_key
0,19995,Avatar,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weave...",[James Cameron],150.437577,2009,https://image.tmdb.org/t/p/w500//kyeqWdyUXW608...,LgZ2MDuJvhc


In [20]:
data = dataset[['movie_id','title','overview','genres','keywords','cast','crew']]

In [21]:
data.head(1)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weave...",[James Cameron]


In [22]:
data['overview'] = data['overview'].apply(lambda x: str(x).split())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['overview'] = data['overview'].apply(lambda x: str(x).split())


In [23]:
data.head(1)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weave...",[James Cameron]


In [24]:
data['genres'] = data['genres'].apply(lambda x:[i.replace(" ","") for i in x])
data['keywords'] = data['keywords'].apply(lambda x:[i.replace(" ","") for i in x])
data['cast'] = data['cast'].apply(lambda x:[i.replace(" ","") for i in x])
data['crew'] = data['crew'].apply(lambda x:[i.replace(" ","") for i in x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['genres'] = data['genres'].apply(lambda x:[i.replace(" ","") for i in x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['keywords'] = data['keywords'].apply(lambda x:[i.replace(" ","") for i in x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['cast'] = data['cast'].apply(lambda x:

In [25]:
data.head(1)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver, ...",[JamesCameron]


In [26]:
data['titleArray'] = data['title'].apply(lambda x: x.split())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['titleArray'] = data['title'].apply(lambda x: x.split())


In [27]:
data.head(1)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,titleArray
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver, ...",[JamesCameron],[Avatar]


In [28]:
data['tags'] = data['titleArray'] + data['overview'] + data['genres'] + data['keywords'] + data['cast'] + data['crew']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tags'] = data['titleArray'] + data['overview'] + data['genres'] + data['keywords'] + data['cast'] + data['crew']


In [29]:
data.head(1)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,titleArray,tags
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver, ...",[JamesCameron],[Avatar],"[Avatar, In, the, 22nd, century,, a, paraplegi..."


In [30]:
data = data[['movie_id','title','tags']]

In [31]:
data.head(1)

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"[Avatar, In, the, 22nd, century,, a, paraplegi..."


In [32]:
data['tags'] = data['tags'].apply(lambda x:" ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tags'] = data['tags'].apply(lambda x:" ".join(x))


In [33]:
data.head(1)

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"Avatar In the 22nd century, a paraplegic Marin..."


In [34]:
data['tags'] = data['tags'].apply(lambda x:x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tags'] = data['tags'].apply(lambda x:x.lower())


In [35]:
data.head(1)

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"avatar in the 22nd century, a paraplegic marin..."


In [36]:
ps = PorterStemmer()

In [37]:
def stem(text):
    y=[]
    for i in text.split():
        y.append(ps.stem(i))

    return " ".join(y)

In [38]:
data['tags'] = data['tags'].apply(stem)

In [39]:
cv = CountVectorizer(max_features=5000, stop_words='english')

In [40]:
vectors = cv.fit_transform(data['tags']).toarray()

In [41]:
vectors

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [42]:
similarity = cosine_similarity(vectors)

In [43]:
similarity

array([[1.        , 0.07798129, 0.08718573, ..., 0.04474374, 0.        ,
        0.        ],
       [0.07798129, 1.        , 0.0559017 , ..., 0.04303315, 0.02151657,
        0.02192645],
       [0.08718573, 0.0559017 , 1.        , ..., 0.02405626, 0.        ,
        0.        ],
       ...,
       [0.04474374, 0.04303315, 0.02405626, ..., 1.        , 0.03703704,
        0.05661385],
       [0.        , 0.02151657, 0.        , ..., 0.03703704, 1.        ,
        0.07548514],
       [0.        , 0.02192645, 0.        , ..., 0.05661385, 0.07548514,
        1.        ]])

In [44]:
def recommend(movie):
    movie_index = data[data['title'] == movie].index[0]
    distances = similarity[movie_index]
    movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:7]

    for i in movies_list:
        print(i)
        print(data.iloc[i[0]].title)

In [45]:
recommend('Avatar')

(1216, 0.3013691781205502)
Aliens vs Predator: Requiem
(2409, 0.2789943329851663)
Aliens
(1194, 0.25310879886733306)
Small Soldiers
(507, 0.250544307197797)
Independence Day
(539, 0.2465984809580359)
Titan A.E.
(3731, 0.24534987303147357)
Falcon Rising


## using Pickle to convert into .pkl files for further converting it into sqlite3 database

In [46]:
pickle.dump(dataset.to_dict(), open('movies.pkl','wb'))

In [47]:
popular = dataset[['movie_id','title','genres','release_date','popularity']]

In [48]:
popular.head()

Unnamed: 0,movie_id,title,genres,release_date,popularity
0,19995,Avatar,"[Action, Adventure, Fantasy, Science Fiction]",2009,150.437577
1,285,Pirates of the Caribbean: At World's End,"[Adventure, Fantasy, Action]",2007,139.082615
2,206647,Spectre,"[Action, Adventure, Crime]",2015,107.376788
3,49026,The Dark Knight Rises,"[Action, Crime, Drama, Thriller]",2012,112.31295
4,49529,John Carter,"[Action, Adventure, Science Fiction]",2012,43.926995


In [49]:
popular = popular.sort_values('popularity', ascending=False)

In [50]:
def byYear(year):
    counter=0
    L=[]
    for row in popular.iterrows():
        if row[1].release_date==year:
            if counter!=6:
                L.append(row[1].title)
                counter=counter+1
            else:
                break
    return L

In [51]:
byYear("2016")

['Deadpool',
 'Captain America: Civil War',
 'Batman v Superman: Dawn of Justice',
 'X-Men: Apocalypse',
 'The Jungle Book',
 'Suicide Squad']

In [52]:
def byGenre(genre):
    counter=0
    L=[]
    for row in popular.iterrows():
        if counter!=6:
            for gen in row[1].genres:
                if gen==genre:
                    L.append(row[1].title)
                    counter+=1
                    break
        else:
            break
    return L

In [53]:
byGenre("Action")

['Deadpool',
 'Guardians of the Galaxy',
 'Mad Max: Fury Road',
 'Jurassic World',
 'Pirates of the Caribbean: The Curse of the Black Pearl',
 'Dawn of the Planet of the Apes']

In [54]:
pickle.dump(popular, open('popular.pkl','wb'))

In [55]:
select = dataset[['movie_id','title','genres','cast','popularity']]

In [56]:
select = select.sort_values('popularity', ascending=False)

In [57]:
def byChoice(genres, castList,obj):
    choice = obj.copy()

    def byChoiceCast():
        cast_fre = []
        for row in choice.iterrows():
            castFre = 0
            for cast in row[1].cast:
                if cast == castList[0] or cast == castList[1] or cast == castList[2] or cast == castList[3] or cast == \
                        castList[4]:
                    castFre += 1
            cast_fre.append(castFre)
        return cast_fre

    choice['cast_fre'] = byChoiceCast()
    choice = choice[choice['cast_fre'] != 0]

    def byChoiceGenre():
        gen_fre = []
        for row in choice.iterrows():
            genFre = 0
            for gen in row[1].genres:
                if gen == genres[0] or gen == genres[1] or gen == genres[2]:
                    genFre += 1
            gen_fre.append(genFre)
        return gen_fre

    choice['gen_fre'] = byChoiceGenre()
    choice = choice[choice['gen_fre'] != 0]

    choice = choice.sort_values('cast_fre', ascending=False)

    choice_movies = []
    counter=0
    for mov in choice.iterrows():
        if counter!=3:
            choice_movies.append(mov[1].title)
            counter+=1
        else:
            break
    return choice_movies


In [58]:
genres = ["Comedy", "Action", "Animation"]
castList = ["Robert De Niro", "Samuel L. Jackson", "Bruce Willis", "Matt Damon", "Nicolas Cage"]

byChoice(genres,castList,select)

['Kiss of Death', 'What Just Happened', 'Die Hard: With a Vengeance']

In [59]:
pickle.dump(select, open('choice.pkl','wb'))

# extracting and summarizing information about the cast members in the dataset, providing insights into the distribution and popularity of actors or actresses in the movies.

In [60]:
cast_dict = {}
for row in dataset.iterrows():
    for cast in row[1].cast:
        if cast in cast_dict:
            cast_dict[cast] = (cast_dict[cast]+1)
        else:
            cast_dict[cast] = 0

cast_dict = dict( sorted(cast_dict.items(), key=operator.itemgetter(1),reverse=True))

cast_list = []
counter=0;
for key in cast_dict:
    if counter <25:
        cast_list.append(key)
    else:
        break;

In [61]:
pickle.dump(vectors, open('vectors.pkl','wb'))

In [62]:
vectors.shape

(4809, 5000)