In [18]:
import math

import pandas as pd
import numpy as np
import bokeh.models as bmo
from bokeh.io import output_notebook, output_file, show
from bokeh.plotting import figure
from bokeh.palettes import d3
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.models import HoverTool, TapTool
from bokeh.models import Span

#output_notebook()
output_file("project02.html")

In [19]:
# Read data from file 
data = pd.read_csv("WikiArtClean.csv", encoding="unicode_escape")

group_by = ["Year", "Style"]
data__grouped = data.groupby(group_by)
df__grouped = data__grouped.agg("mean").reset_index()
df__count = data__grouped.size().reset_index(name="count")

In [22]:
hover = HoverTool(tooltips = [
    ("Year", "@year"),
    ("Style", "@style"),
    ("Mean Rating", "@mean{(0.000)}"),
    ("Number of Paintings", "@count")
])

p = figure(
    sizing_mode = "stretch_both",
    x_range = [df__grouped["Year"].min() - 10, df__grouped["Year"].max() + 10],
    y_range = [df__grouped["Mean rating"].min() - 0.05, df__grouped["Mean rating"].max() + 0.05],
    match_aspect = True,
    tools=[hover, "reset", "box_zoom", "wheel_zoom"],
    active_scroll="wheel_zoom",
    active_drag="box_zoom"
)

for data, style, color in zip(
        [
            df__grouped.loc[df__grouped["Style"] == "Renaissance Art"],
            df__grouped.loc[df__grouped["Style"] == "Post Renaissance Art"],
            df__grouped.loc[df__grouped["Style"] == "Modern Art,Post Renaissance Art"],
            df__grouped.loc[df__grouped["Style"] == "Modern Art"],
            df__grouped.loc[df__grouped["Style"] == "Contemporary Art,Modern Art"],
            df__grouped.loc[df__grouped["Style"] == "Contemporary Art"]
        ], 
        [
            "Renaissance Art", 
            "Post Renaissance Art", 
            "Modern Art,Post Renaissance Art", 
            "Modern Art", 
            "Contemporary Art,Modern Art",
            "Contemporary Art"
        ],
        d3["Category10"][len(df__grouped["Style"].unique())]
    ):
    df = pd.DataFrame(data)


    style_count = df__count.loc[df__count["Style"] == style]
    
    source = ColumnDataSource(data={
        "mean" : df["Mean rating"],
        "year" : df["Year"],
        "style" : df["Style"],
        "count" : style_count.loc[style_count["Year"] == df["Year"].values]["count"],
        "countscaled" : style_count.loc[style_count["Year"] == df["Year"].values]["count"].apply(lambda x: 2 * math.sqrt(x))
    })
    
    p.line (
        x="year", 
        y="mean",
        color = color,
        alpha = 0.8,
        legend = "style",
        line_width = 2,
        source=source
    )
    
    p.circle(
        x="year", 
        y="mean",
        color = color,
        alpha = 0.8,
        legend = "style",
        source=source,
        size="countscaled"
    )
    
y0 = Span(
    location=0,
    dimension="width",
    line_color="black",
    line_width=1.5
)
    
p.add_layout(y0)
    
p.legend.location = "top_left"
p.legend.click_policy="hide"

p.title.text = "Mean User Ratings for Painting Styles Over Time"
p.title.align = "center"
p.title.text_font_size = "2em"

p.xaxis.axis_label = "Year"
p.xaxis.axis_label_text_font_size = "1.5em"

p.yaxis.axis_label = "Mean User Rating"
p.yaxis.axis_label_text_font_size = "1.5em"

show(p)