In [46]:
# Import dependencies
from pymongo import MongoClient
import json
import requests 
from pprint import pprint

In [47]:
# Create an instance of MongoClient
mongo = MongoClient('mongodb://localhost:27017/')

In [48]:
# confirm that our new database was created
print(mongo.list_database_names())

['Test', 'admin', 'config', 'imdb', 'local', 'met', 'test2', 'test3', 'uk_food']


In [49]:
# assign the imdb database to a variable name
imdb = mongo['imdb']

In [50]:
# review the collections in our new database
print(imdb.list_collection_names())

['TV', 'movies']


In [51]:
# review a document in the tv collection
pprint(imdb.TV.find_one())

{'End_Year': 2019,
 'Genres': ['Action', 'Adventure', 'Drama'],
 'Rating': 9.2,
 'Stars': ['Peter Dinklage', 'Kit Harington', 'Lena Headey'],
 'Start_Year': 2011,
 'Title': 'Game of Thrones',
 'Votes': 2141065,
 '_id': ObjectId('641d048ab76eb895f4a53eda')}


In [52]:
# assign the collection to a variable
TV = imdb['TV']
# assign the collection to a variable
movies = imdb['movies']

In [53]:
#Top 10 shows by votes

# Use the 'find' method to retrieve documents from the TV collection, sorted by descending vote count
top_shows = TV.find().sort('Votes', -1).limit(10)

# Iterate through the results and print out the title and vote count for each show
for show in top_shows:
    print(show['Title'], show['Votes'])


Game of Thrones 2141065
Breaking Bad 1945469
Stranger Things 1225425
The Walking Dead 1016458
Friends 1015775
Sherlock 942260
The Big Bang Theory 822355
Dexter 734340
How I Met Your Mother 695209
The Office 626557


In [54]:
#Bottom 10 TV shows

# Use the 'find' method to retrieve documents from the TV collection, sorted by ascending vote count
bottom_shows = TV.find().sort('Votes', 1).limit(10)

# Iterate through the results and print out the title and vote count for each show
for show in bottom_shows:
    print(show['Title'], show['Votes'])

Graceland 17235
The Ellen DeGeneres Show 17249
Dickinson 17258
Uncoupled 17281
Metalocalypse 17296
Two Guys, a Girl and a Pizza Place 17354
Shameless 17356
All in the Family 17366
Electric Dreams 17384
Justice League Unlimited 17409


In [55]:
#most appeared Star

# Use the 'aggregate' method to group documents by each star, and count the number of times they appear
star_count = TV.aggregate([
    {"$unwind": "$Stars"},
    {"$group": {"_id": "$Stars", "count": {"$sum": 1}, "shows": {"$push": "$Title"}}},
    {"$sort": {"count": -1}},
    {"$limit": 1}
])

# Retrieve the actor with the most appearances from the result
most_frequent_actor = list(star_count)[0]

# Print out the actor's name, the number of times they appeared, and the shows they appeared in
print("Actor:", most_frequent_actor['_id'])
print("Number of appearances:", most_frequent_actor['count'])
print("Shows:", most_frequent_actor['shows'])

Actor: Tom Kenny
Number of appearances: 8
Shows: ['Adventure Time', 'Final Space', 'The Powerpuff Girls', 'Johnny Bravo', 'CatDog', "Foster's Home for Imaginary Friends", "Rocko's Modern Life", 'Paradise PD']


In [56]:
#average tv rating by decade

# Use the 'aggregate' method to group documents by the decade, based on the start year
ratings_by_decade = TV.aggregate([
    {"$addFields": {
        "Decade": {
            "$subtract": [{"$floor": {"$divide": ["$Start_Year", 10]}}, 1]
        }
    }},
    {"$group": {"_id": "$Decade", "average_rating": {"$avg": "$Rating"}}},
    {"$sort": {"average_rating": -1, "_id": -1}}
])

# Iterate through the results and print out the decade and average rating for each
for decade in ratings_by_decade:
    # Adjust decade value to show as a four digit year
    decade_year = (decade['_id'] + 1) * 10
    print(decade_year, decade['average_rating'])

1950.0 8.700000000000001
1970.0 8.28
1960.0 7.9363636363636365
1980.0 7.869767441860464
2010.0 7.803340292275575
2000.0 7.777464788732394
1990.0 7.742696629213484
2020.0 7.441447368421052


In [57]:
#top 10 movies

# Use the 'find' method to retrieve the top 10 movies by votes, and sort by descending order
top_movies = movies.find().sort([("Votes", -1)]).limit(10)

# Iterate through the results and print out the movie titles and number of votes for each
for movie in top_movies:
    print(movie['Title'], movie['Votes'])


The Shawshank Redemption 2717298
The Dark Knight 2689938
Inception 2387301
Fight Club 2160789
Forrest Gump 2112227
Pulp Fiction 2086592
The Matrix 1937921
The Lord of the Rings: The Fellowship of the Ring 1899064
The Godfather 1887987
Interstellar 1877393


In [58]:
# bottom 10 movies

# Use the 'find' method to retrieve the bottom 10 movies by votes, and sort by ascending order
bottom_movies = movies.find().sort([("Votes", 1)]).limit(10)

# Iterate through the results and print out the movie titles and number of votes for each
for movie in bottom_movies:
    print(movie['Title'], movie['Votes'])

Winter Light 25758
Yi Yi: A One and a Two... 25782
Black Narcissus 25826
Like Father, Like Son 26066
Through a Glass Darkly 26074
Everything's Gonna Be Great 26110
Sarfarosh 26120
The Way He Looks 26192
Le Cercle Rouge 26293
A Woman Under the Influence 26359


In [59]:
#most appeares movie star

# Use the 'aggregate' method to group documents by each star, and count the number of times they appear
star_count = movies.aggregate([
    {"$unwind": "$Stars"},
    {"$group": {"_id": "$Stars", "count": {"$sum": 1}, "movies": {"$push": "$Title"}}},
    {"$sort": {"count": -1}},
    {"$limit": 1}
])

# Retrieve the actor with the most appearances from the result
most_frequent_actor = list(star_count)[0]

# Print out the actor's name, the number of times they appeared, and the movies they appeared in
print("Actor:", most_frequent_actor['_id'])
print("Number of appearances:", most_frequent_actor['count'])
print("Movies:", most_frequent_actor['movies'])

Actor: Robert De Niro
Number of appearances: 16
Movies: ['The Godfather Part II', 'Goodfellas', 'Joker', 'Heat', 'Once Upon a Time in America', 'Taxi Driver', 'Casino', 'Raging Bull', 'The Deer Hunter', 'Brazil', 'The Irishman', 'The Untouchables', 'Awakenings', 'A Bronx Tale', 'The King of Comedy', 'Silver Linings Playbook']


In [62]:
#average movie rating by decade

# Use the 'aggregate' method to group documents by the decade, based on the release year
ratings_by_decade = movies.aggregate([
    {"$match": {"Release_Year": {"$exists": True, "$ne": None}}},
    {"$addFields": {
        "Decade": {
            "$subtract": [{"$floor": {"$divide": ["$Release_Year", 10]}}, 1]
        }
    }},
    {"$group": {"_id": "$Decade", "average_rating": {"$avg": "$Rating"}}},
    {"$sort": {"average_rating": -1, "_id": -1}}
])

# Iterate through the results and print out the decade and average rating for each
for decade in ratings_by_decade:
    # Adjust decade value to show as a four digit year
    if decade['_id'] is not None:
        decade_year = (decade['_id'] + 1) * 10
        print(decade_year, decade['average_rating'])

In [64]:
#director who has appeared the most and there ranking

# Use the 'aggregate' method to group documents by director and calculate their average rating
director_ratings = movies.aggregate([
    {"$group": {"_id": "$Director", "average_rating": {"$avg": "$Rating"}, "count": {"$sum": 1}}},
    {"$sort": {"count": -1}},
    {"$limit": 1}
])

# Iterate through the results and print out the director, their average rating, and the number of times they appear
for director in director_ratings:
    print("Director:", director['_id'])
    print("Average rating:", director['average_rating'])
    print("Number of appearances:", director['count'])

Director: Alfred Hitchcock
Average rating: 8.066666666666666
Number of appearances: 12
