# Task A: Visualisation of Year, Runtime, and Genre

The top 99 highest rated IMDB movies are visualised in this demonstration. We display how the run time of the movies vary over the years, how may movies from this list were released in the year, and also allow the user to filter the movies based on the genres.

## Instructions

1. All the genres are enabled by default. You can remove those you are not interested in and the plots will reflect your choice. Use the sidebar on the left for this.
2. There is also a slider that changes the height of the charts.
3. Hovering over the points will show related information on tooltips.
4. The scatter plot allows rectangular selection and the bar chart allows interval selection. The brushes selectively highlight on both plots. The selection area can be moved around by dragging. The size can be changed by scrolling on the selected area.
5. Click outside the selection area to deselect and reset the plot.

In [30]:
%%capture
# Code comments are not visible to the end-user
# !pip install altair
# !pip install mercury
# !pip install pandas

In [31]:
# Import the necessary packages
import altair as alt  # For interactive and custom visuals
import mercury as mr  # For additional input and filtering capabilities
import pandas as pd   # For working with the dataset

In [32]:
# We load in the data set from the CSV file
movies = pd.read_csv('data/imdb100/movies.csv')
# movies

In [33]:
def clean_year(year):
    """
    Removes the parentheses and returns only the year integer value
    """
    return year[1:-1]

In [34]:
# Assign the cleaned year value to a new column
movies['Year'] = movies['year_of_release'].apply(clean_year)
# movies

In [35]:
def clean_runtime(runtime):
    """
    Removes the trailing
    """
    return int(runtime[:-4])

In [36]:
# Assign the cleaned run time value to a new column
movies['Runtime in minutes'] = movies['run_time'].apply(clean_runtime)
# movies

In [37]:
# Remove the extra columns
movies.drop(columns=[
    'category', 'votes', 'gross_total', 'year_of_release', 'run_time',
], inplace=True)

# Rename the rest of the columns to cleaner names
movies.rename(columns={
    'index': 'Rank', 'movie_name': 'Movie Name', 'genre': 'Genre', 'imdb_rating': 'Rating'
}, inplace=True)

# Assign types for easier type inference
movies = movies.astype({
    'Rank': 'int',
    'Movie Name': 'string',
    'Genre': 'string',
    'Year': 'int',
    'Runtime in minutes': 'int',
    'Rating': 'float'
})
# movies

In [38]:
# Extract all individual genres present in the Genre column values
genre_values = list(movies['Genre'])

genres = set()
for g in genre_values:
    for item in g.split(', '):
        genres.add(item)
# genres

In [39]:
def apply_genre(text, genre):
    return genre in text

# Create one-hot encoding for genres
for genre in genres:
    movies[genre] = movies['Genre'].apply(apply_genre, genre=genre)
# movies

In [40]:
app = mr.App(
    description="A visualisation with interactive plots showing the relationship between movie year, run time, and genres.",
    show_code=False,
    show_prompt=False,
    continuous_update=True,
    static_notebook=False,
    show_sidebar=True,
    full_screen=True,
    allow_download=False,
)

In [41]:
# Chart configuration parameters
chart_height = mr.Slider(value=300, min=300, max=500, label="Chart height", step=20)
chart_width = 750

mercury.Slider

In [42]:
# Genre filtering multi-selection tool
selected_genres = mr.MultiSelect(label="Select genre(s)", value=list(genres), choices=list(genres))

mercury.MultiSelect

In [43]:
# selected_genres.value

In [44]:
# Filter based on the genres
indices = movies['Rank'] > 100
for g in selected_genres.value:
    indices |= movies[g]
movies['Highlight'] = False
movies.loc[indices, 'Highlight'] = True
# movies

In [45]:
# We retain a filtered version based on the selected genres.
filtered_movies = movies[movies['Highlight']]

In [46]:
# The information we want to display on tooltips
tooltip_cols = ['Movie Name', 'Rank', 'Genre', 'Year', 'Runtime in minutes', 'Rating',]

# The selection mechanisms on the graphs
brush = alt.selection(type='interval', resolve='global')
# single = alt.selection_single()

# The graph opacity condition for local selection (brush and click)
# opacity = alt.condition(brush | single, alt.value(0.9), alt.value(0.1))
opacity = alt.condition(brush, alt.value(0.9), alt.value(0.1))

# The chart for scatter plot of the movies and their runtimes
points = alt.Chart(movies).mark_point(filled=True).encode(
    alt.X('Year:O',
        scale=alt.Scale(zero=False, padding=8),
        axis=alt.Axis(tickMinStep=1)
    ),
    alt.Y('Runtime in minutes',
        scale=alt.Scale(zero=False, padding=25),
    ),
    tooltip=tooltip_cols,
    color=alt.condition(
        alt.datum['Highlight'],
        'Rank',
        alt.value('grey')
    ),
    size='Rating',
    opacity=opacity
).add_selection(
    brush,
#     single
).properties(
    width=chart_width,
    height=chart_height.value
)

if selected_genres.value:
    # Filtered chart for movies with stacked runtimes
    bars = alt.Chart(filtered_movies).mark_bar().encode(
        alt.X('Year',
            scale=alt.Scale(padding=8),
            axis=alt.Axis(tickMinStep=1)
        ),
        alt.Y('sum(Runtime in minutes)',
#             scale=alt.Scale(padding=25)
        ),
        tooltip=tooltip_cols,
        color=alt.Color('Rank:Q',
            scale=alt.Scale(scheme='magma')
        ),    
        opacity=opacity,
    ).add_selection(
        brush,
    #     single
    ).properties(
        width=chart_width,
        height=chart_height.value
    )

# Vertically stack the two charts
final = (points & bars) if selected_genres.value else points
final.configure_axis(
    labelFontSize=15,
    titleFontSize=20,
)

In [18]:
# Display the filtered table of movies
if selected_genres.value:
    display_movies = filtered_movies.set_index('Rank')
display_movies[['Movie Name', 'Genre', 'Year', 'Runtime in minutes', 'Rating']] if selected_genres.value else None

Unnamed: 0_level_0,Movie Name,Genre,Year,Runtime in minutes,Rating
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,The Godfather,"Crime, Drama",1972,175,9.2
2,The Silence of the Lambs,"Crime, Drama, Thriller",1991,118,8.6
3,Star Wars: Episode V - The Empire Strikes Back,"Action, Adventure, Fantasy",1980,124,8.7
4,The Shawshank Redemption,Drama,1994,142,9.3
5,The Shining,"Drama, Horror",1980,146,8.4
...,...,...,...,...,...
95,The Usual Suspects,"Crime, Drama, Mystery",1995,106,8.5
96,Cool Hand Luke,"Crime, Drama",1967,127,8.1
97,Eternal Sunshine of the Spotless Mind,"Drama, Romance, Sci-Fi",2004,108,8.3
98,City Lights,"Comedy, Drama, Romance",1931,87,8.5


Made with ❤️ by Group 18.

February 2023