In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

import ipywidgets as widgets
from ipywidgets import HBox, VBox, IntSlider, Play, jslink, interactive

import plotly.graph_objects as go

# Sidebar

In [2]:
movie_df = pd.read_csv('data/movie_dfce.zip')
movie_dfc = movie_df.copy()

In [3]:
genre_cols = ['Any']
genre_cols.extend(list(movie_df.columns[7:-1]))

axis_map = {
    "Average Rating": "averageRating",
    "Popularity": "popularity",
    "Number of Reviews": "numVotes",
    "Revenue Earned (dollars)": "revenue",
    "Length (minutes)": "runtimeMinutes",
    "Year": "startYear",
}

# Filters

In [4]:
components = []

ratingc_label = widgets.HTML(value="Minimum number of ratings on IMDb:")
ratingc = widgets.IntSlider(value=100, min=100, max=3000000, step=100)
components.extend([ratingc_label, ratingc])

ratingv_label = widgets.HTML(value="Minimum average rating on IMDb:")
ratingv = widgets.IntSlider(value=0, min=0, max=5, step=1)
components.extend([ratingv_label, ratingv])

released_year_label = widgets.HTML(value="Year released:")
released_year = widgets.IntRangeSlider(value=[1911, 2021], min=1911, max=2021, step=1)
components.extend([released_year_label, released_year])

revenue_label = widgets.HTML(value="Revenue Earned:")
revenue = widgets.IntSlider(value=0, min=10000, max=300000000, step=10000)
components.extend([revenue_label, revenue])

genre_label = widgets.HTML(value="Genre (a movie can have multiple genres):")
genre = widgets.Dropdown(options=genre_cols, value="Any")
components.extend([genre_label, genre])

all_widgets = widgets.VBox(components)
all_widgets

VBox(children=(HTML(value='Minimum number of ratings on IMDb:'), IntSlider(value=100, max=3000000, min=100, st…

# Variables

In [5]:
components = []

x_axis_label = widgets.HTML(value="X-axis variable:")
x_axis = widgets.Dropdown(options=list(axis_map.items()), value="runtimeMinutes")
components.extend([x_axis_label, x_axis])

y_axis_label = widgets.HTML(value="Y-axis variable:")
y_axis = widgets.Dropdown(options=list(axis_map.items()), value="averageRating")
components.extend([y_axis_label, y_axis])


note = "Note: The Average Rating is collected from IMDb, and is the average of 1 to 10 ratings given by users, divided by 2."
note_label = widgets.HTML(value=note)
components.append(note_label)

all_widgets = widgets.VBox(components)
all_widgets

VBox(children=(HTML(value='X-axis variable:'), Dropdown(index=4, options=(('Average Rating', 'averageRating'),…

In [6]:
movie_dfc["color"] = np.where(movie_dfc["averageRating"] > 3, "orange", "grey")
movie_dfc["alpha"] = np.where(movie_dfc["averageRating"] > 3, 0.9, 0.25)
movie_dfc.fillna(0, inplace=True)

In [7]:
fig = go.FigureWidget()
plot = go.Scatter(x=[], y=[], mode="markers", text=[], hoverinfo="text", marker=dict(color=[], opacity=[], size=7))
fig.add_trace(plot)

margin = go.layout.Margin(l=20, r=20, b=20, t=30)
fig = fig.update_layout(margin=margin)

In [8]:
def select_movies():
    selected = movie_dfc[
        (movie_dfc.numVotes >= ratingc.value) &
        (movie_dfc.averageRating >= ratingv.value) &
        (movie_dfc.revenue >= (revenue.value)) &
        (movie_dfc.startYear >= released_year.value[0]) &
        (movie_dfc.startYear <= released_year.value[1])
    ]
    if (genre.value == "Any"):
        return selected
    else:
        curgen = genre.value
        selected = selected[selected[curgen]>0]
        return selected


def on_value_change(change):
    movie_dfc = select_movies()
    x_name = x_axis.value
    y_name = y_axis.value

    fig.data[0]['x'] = movie_dfc[x_name]
    fig.data[0]['y'] = movie_dfc[y_name]
    fig.data[0]['marker']['color'] = movie_dfc["color"]
    fig.data[0]['marker']['opacity'] = movie_dfc["alpha"]
    fig.data[0]['text'] = movie_dfc["title"].astype(str) + "<br>" + movie_dfc["averageRating"].astype(str) + "<br>" + movie_dfc["numVotes"].astype(str) + "<br>"+ movie_dfc["revenue"].astype(str)
    
    fig.update_xaxes(title_text=x_axis.label)
    fig.update_yaxes(title_text=y_axis.label)
    fig.update_layout(title="%d movies selected" % len(movie_dfc))

In [9]:
controls = [ratingc, ratingv, released_year, revenue, genre]
for control in controls:
    control.observe(on_value_change, names="value")

In [10]:
on_value_change(None)
fig

FigureWidget({
    'data': [{'hoverinfo': 'text',
              'marker': {'color': array(['orange', 'orange',…