# This notebook finds the popular movie by counting the number of ratings given to a movie and displays the movienames using broadcast variable.

In [1]:
import findspark
findspark.init()

In [2]:
from pyspark import SparkConf, SparkContext

In [3]:
def loadMovieNames():
    movieNames = {}
    with open("ml-100k/u.ITEM") as f:
        for line in f:
            fields = line.split('|')
            movieNames[int(fields[0])] = fields[1]
    return movieNames

In [4]:
conf = SparkConf().setMaster("local").setAppName("PopularMovies")
sc = SparkContext(conf = conf)

In [5]:
nameDict = sc.broadcast(loadMovieNames())

In [6]:
lines = sc.textFile("file:///PySpark/Key-Value_RDD/ml-100k/u.data")
movies = lines.map(lambda x: (int(x.split()[1]), 1))
moviesCounts = movies.reduceByKey(lambda x, y: x + y)
sortedMovies = moviesCounts.sortBy(lambda x: x[1], ascending= False)
results = sortedMovies.collect()

In [7]:
for movie, rating in results:
    print(f'MovieId: {movie} has {rating} rating counts!')

MovieId: 50 has 583 rating counts!
MovieId: 258 has 509 rating counts!
MovieId: 100 has 508 rating counts!
MovieId: 181 has 507 rating counts!
MovieId: 294 has 485 rating counts!
MovieId: 286 has 481 rating counts!
MovieId: 288 has 478 rating counts!
MovieId: 1 has 452 rating counts!
MovieId: 300 has 431 rating counts!
MovieId: 121 has 429 rating counts!
MovieId: 174 has 420 rating counts!
MovieId: 127 has 413 rating counts!
MovieId: 56 has 394 rating counts!
MovieId: 7 has 392 rating counts!
MovieId: 98 has 390 rating counts!
MovieId: 237 has 384 rating counts!
MovieId: 117 has 378 rating counts!
MovieId: 172 has 367 rating counts!
MovieId: 222 has 365 rating counts!
MovieId: 204 has 350 rating counts!
MovieId: 313 has 350 rating counts!
MovieId: 405 has 344 rating counts!
MovieId: 79 has 336 rating counts!
MovieId: 210 has 331 rating counts!
MovieId: 151 has 326 rating counts!
MovieId: 173 has 324 rating counts!
MovieId: 69 has 321 rating counts!
MovieId: 748 has 316 rating counts!
M

MovieId: 1391 has 3 rating counts!
MovieId: 1096 has 3 rating counts!
MovieId: 1293 has 3 rating counts!
MovieId: 1389 has 3 rating counts!
MovieId: 1513 has 3 rating counts!
MovieId: 1420 has 3 rating counts!
MovieId: 1506 has 3 rating counts!
MovieId: 1528 has 3 rating counts!
MovieId: 1191 has 3 rating counts!
MovieId: 1465 has 3 rating counts!
MovieId: 1146 has 3 rating counts!
MovieId: 1516 has 3 rating counts!
MovieId: 1623 has 3 rating counts!
MovieId: 1408 has 3 rating counts!
MovieId: 1602 has 3 rating counts!
MovieId: 1155 has 3 rating counts!
MovieId: 1323 has 3 rating counts!
MovieId: 1609 has 3 rating counts!
MovieId: 1552 has 3 rating counts!
MovieId: 1607 has 3 rating counts!
MovieId: 1610 has 3 rating counts!
MovieId: 1144 has 3 rating counts!
MovieId: 1490 has 3 rating counts!
MovieId: 1027 has 3 rating counts!
MovieId: 1639 has 3 rating counts!
MovieId: 1196 has 3 rating counts!
MovieId: 1658 has 3 rating counts!
MovieId: 1652 has 3 rating counts!
MovieId: 1256 has 3 

In [8]:
withMovieNames = sortedMovies.map(lambda x: (nameDict.value[x[0]], x[1]))
results = withMovieNames.collect()

In [9]:
for movie, rating in results:
    print(f'Movie: {movie} has rated {rating} times!')

Movie: Star Wars (1977) has rated 583 times!
Movie: Contact (1997) has rated 509 times!
Movie: Fargo (1996) has rated 508 times!
Movie: Return of the Jedi (1983) has rated 507 times!
Movie: Liar Liar (1997) has rated 485 times!
Movie: English Patient, The (1996) has rated 481 times!
Movie: Scream (1996) has rated 478 times!
Movie: Toy Story (1995) has rated 452 times!
Movie: Air Force One (1997) has rated 431 times!
Movie: Independence Day (ID4) (1996) has rated 429 times!
Movie: Raiders of the Lost Ark (1981) has rated 420 times!
Movie: Godfather, The (1972) has rated 413 times!
Movie: Pulp Fiction (1994) has rated 394 times!
Movie: Twelve Monkeys (1995) has rated 392 times!
Movie: Silence of the Lambs, The (1991) has rated 390 times!
Movie: Jerry Maguire (1996) has rated 384 times!
Movie: Rock, The (1996) has rated 378 times!
Movie: Empire Strikes Back, The (1980) has rated 367 times!
Movie: Star Trek: First Contact (1996) has rated 365 times!
Movie: Back to the Future (1985) has rat

Movie: Paradise Road (1997) has rated 7 times!
Movie: Kim (1950) has rated 7 times!
Movie: National Lampoon's Senior Trip (1995) has rated 7 times!
Movie: Steal Big, Steal Little (1995) has rated 7 times!
Movie: Trial by Jury (1994) has rated 7 times!
Movie: Lassie (1994) has rated 7 times!
Movie: Mercury Rising (1998) has rated 7 times!
Movie: Double Happiness (1994) has rated 7 times!
Movie: Stranger in the House (1997) has rated 7 times!
Movie: Bliss (1997) has rated 7 times!
Movie: Dream With the Fishes (1997) has rated 7 times!
Movie: Aparajito (1956) has rated 7 times!
Movie: Night Flier (1997) has rated 7 times!
Movie: Flower of My Secret, The (Flor de mi secreto, La) (1995) has rated 6 times!
Movie: Kissed (1996) has rated 6 times!
Movie: Second Jungle Book: Mowgli & Baloo, The (1997) has rated 6 times!
Movie: Zeus and Roxanne (1997) has rated 6 times!
Movie: Best of the Best 3: No Turning Back (1995) has rated 6 times!
Movie: Metisse (Café au Lait) (1993) has rated 6 times!
Mo

In [11]:
print(f'{withMovieNames.collect()[0][0]} is the most Popular movie with {withMovieNames.collect()[0][1]}\
 rating counts!')

Star Wars (1977) is the most Popular movie with 583 rating counts!
