# Lab 3
Oscar Fredriksson & Isak Kjellhard


### Import libraries
Relevant parts of the Bokeh library aswell as other utility libraries are imported. 

In [52]:
from bokeh.plotting import figure, show, curdoc
from bokeh.models import Slider, ColumnDataSource, HoverTool, CustomJS
from bokeh.layouts import layout, gridplot
from bokeh.models.glyphs import Circle
from bokeh.io import output_notebook, push_notebook

from collections import defaultdict
import math
import pandas as pd

### Setting up Bokeh to use with Notebook
the `output_notebook()` command shows what bokeh prints in the notebook instead of on a seperate web server. 

In [53]:
output_notebook()

### Import and process Dataset
The dataset is imported using pandas and then the relevant columns are kept and the rest are filtered out. While exploring the dataset it was noticed that some entries didn't have all columns filled in, or simply had it set to 0, these won't work with the visualization and are therefore filtered out.

In [54]:
df = pd.read_csv("datasets/IMDB_movies/imdb_movie_metadata.csv")

df = df.filter(["imdb_score", "gross", "budget", "movie_title", "title_year"])

df = df[df.gross > 0]  # Filter out rows without gross data
df = df[df.imdb_score > 0]  # Filter out rows without score data
df = df[df.budget > 0]  # Filter out rows without budget data

#df = df.sort_values("imdb_score")

### Setting up a shared `ColumnDataSource()`
The two different plots share a `ColumnDataSource()` object that allows linking between the two. If one or more datapoints is selected in one of the plot, the same datapoint(s) is highlighted in the second plot, this shared object is needed to make this feature work. 

In [55]:

source = ColumnDataSource(
    data=dict(
        score=df["imdb_score"],
        gross=df["gross"],
        budget=df["budget"],
        title=df["movie_title"],
        year=df["title_year"],
    )
)


### Hover tool 
To help the visualization a hover tool is added that can be used to show details of each datapoint in the plot.

In [56]:
hover_tool = HoverTool(
    tooltips=[
        ("Title", "@title"),
        ("Year", "@year"),
        ("Gross", "$@gross"),
        ("Budget", "$@budget"),
        ("Imdb score", "@{score}{0.1f}"),
    ]
)

### Shared constants
Two visualization plots has been created with some shared constants to make them look the same and have the same tools available. 

In [57]:
tools = "box_select,box_zoom,reset"
fill_color = "red"
line_color = "red"
fill_color = "gray"
graph_size = 600
circle_size = 8

selection_glyph = Circle(fill_color=fill_color, fill_alpha=1, line_color=line_color)
nonselection_glyph = Circle(fill_color=fill_color, fill_alpha=0.1, line_color=None)

### Creating the first plot
The plot is created by passing all the needed data to a ```figure()``` object. The previously created ```hover_tool``` is then added to the plot object. 

In [58]:
# # create a new plot with a title and axis labels
plot1 = figure(
    tools=tools,
    title="",
    # y_range=source.data["y"],
    x_axis_label="IMDB Score (0-10)",
    y_axis_label="Gross",
    plot_width=graph_size,
    plot_height=graph_size,
)

plot1_circle = plot1.circle(
    x="score",
    y="gross",
    source=source,
    fill_alpha=0.5,
    fill_color=fill_color,
    line_color=line_color,
    size=circle_size,
)

plot1.left[0].formatter.use_scientific = False
plot1_circle.selection_glyph = selection_glyph
plot1_circle.nonselection_glyph = nonselection_glyph

plot1.add_tools(hover_tool)

### Creating the second plot
The second plot is created the same way as the first one with just slightly tweaked parameters.

In [59]:
plot2 = figure(
    tools=tools,
    title="",
    # y_range=source.data["y"],
    x_axis_label="Year",
    y_axis_label="Budget",
    plot_width=graph_size,
    plot_height=graph_size,
)

plot2_circle = plot2.circle(
    x="year",
    y="budget",
    source=source,
    fill_alpha=0.5,
    fill_color=fill_color,
    line_color=line_color,
    size=circle_size,
)

plot2.left[0].formatter.use_scientific = False
plot2_circle.selection_glyph = selection_glyph
plot2_circle.nonselection_glyph = nonselection_glyph

plot2.add_tools(hover_tool)

### Plot 1

The first plot visualizes the relationship between a movies total gross and its IMDB Score. 

In [60]:
show(plot1)

### Plot 2

The first plot visualizes the relationship between a movies budget and the year it was released. It is worth noting that the budget is in the movies local currencies.

In [61]:
show(plot2)