In [1]:
import pandas as pd
import altair as alt

# Load the dataset
df = pd.read_csv('data/ClassicHit_clean.csv')

# Select only the qualitative features
qualitative_features = [
    'Danceability',
    'Energy',
    'Speechiness',
    'Acousticness',
    'Instrumentalness',
    'Liveness',
    'Valence'
]
qualitative_df = df[qualitative_features]

# Setup the correlation matrix
corr_matrix = qualitative_df.corr()
corr = corr_matrix.reset_index().melt(id_vars='index')
corr.columns = ['Feature1', 'Feature2', 'Correlation']

# Create the heatmap
heatmap = alt.Chart(corr).mark_rect(
    stroke='white',
    strokeWidth=0.5
).encode(
    x=alt.X('Feature2:O',
            sort=qualitative_features,
            axis=alt.Axis(
                labelAngle=45, 
                labelFontSize=16, 
                titleFontSize=14
            ),
            title=''),
    y=alt.Y('Feature1:O',
            sort=qualitative_features, 
            axis=alt.Axis(
                labelFontSize=16, 
                titleFontSize=14
            ),
            title=''),
    color=alt.Color(
        'Correlation:Q',
        scale=alt.Scale(
            domain=[-1, 0, 1], 
            range=['#3B4CC0', '#FFFFFF', '#B40426']
        ),
        legend=alt.Legend(
            title='Correlation', 
            labelFontSize=12, 
            titleFontSize=14
        )
    )
).properties(
    width=1000,
    height=800,
    title=alt.TitleParams(
        text='Correlation Heatmap of Qualitative Music Features',
        fontSize=20
    )
)

# Value annotations
annotations = heatmap.mark_text(baseline='middle', fontSize=16).encode(
    text=alt.Text('Correlation:Q', format='.2f'),
    color=alt.condition(
        'datum.Correlation > 0.5 || datum.Correlation < -0.5',
        alt.value('white'),
        alt.value('black')
    )
)

# Combine heatmap and annotations
plot = (heatmap + annotations).configure_view(strokeWidth=0)

plot

In [3]:
import pandas as pd
import altair as alt
import numpy as np

alt.data_transformers.enable('json')

# Load the dataset
df = pd.read_csv('data/ClassicHit_clean.csv')

def create_histogram(feature_name):
    """ Make a histogram for a given feature """
    # Set tick values (tenths)
    tick_values = np.linspace(0, 1, 10).tolist()
    
    return alt.Chart(df).mark_bar(
        color='#8BAAE0',
        opacity=0.8,
        binSpacing=0
    ).encode(
        x=alt.X(
            f'{feature_name}:Q', 
            title=feature_name,
            axis=alt.Axis(
                titleFontSize=26,
                labelFontSize=20,
                titlePadding=15,
                grid=True,
                values=tick_values,
                format='.1f'
            ),
            bin=alt.Bin(maxbins=20),
            scale=alt.Scale(domain=[0, 1])
        ),
        y=alt.Y(
            'count()', 
            title='Count',
            axis=alt.Axis(
                titleFontSize=26,
                labelFontSize=20,
                titlePadding=15,
                grid=True
            )
        )
    ).properties(
        title={
            'text': f'{feature_name} Distribution',
            'fontSize': 30,
            'fontWeight': 'bold',
            'anchor': 'middle',
            'offset': 20
        },
        width=1000,
        height=800
    )

# Create the individual subplots
plot1 = create_histogram('Danceability')
plot2 = create_histogram('Energy')
plot3 = create_histogram('Liveness')
plot4 = create_histogram('Valence')

# Combine into a 2x2 grid
top_row = plot1 | plot2
bottom_row = plot3 | plot4
grid = (top_row & bottom_row).resolve_scale(
    x='independent',
    y='independent'
)

grid

In [5]:
import pandas as pd
import altair as alt
import numpy as np

alt.data_transformers.enable('json')

# Load the dataset
df = pd.read_csv('data/ClassicHit_clean.csv')

# Set the tick values (tenths)
tick_values = np.linspace(0, 1, 10).tolist()

# Create the scatter plot
scatter = alt.Chart(df).mark_circle(
    opacity=0.5, 
    size=80
).encode(
    x=alt.X(
        'Instrumentalness:Q', 
        title='Instrumentalness',
        axis=alt.Axis(
            titleFontSize=26,
            labelFontSize=20,
            titlePadding=15,
            grid=True,
            values=tick_values,
            format='.1f'
        ),
        scale=alt.Scale(domain=[0, 1])
    ),
    y=alt.Y(
        'Speechiness:Q', 
        title='Speechiness',
        axis=alt.Axis(
            titleFontSize=26,
            labelFontSize=20,
            titlePadding=15,
            grid=True,
            values=tick_values,
            format='.1f'
        ),
        scale=alt.Scale(domain=[0, 1])
    ),
    tooltip=['Instrumentalness', 'Speechiness']
).properties(
    width=1000,
    height=800,
    title={
        'text': 'Instrumentalness vs. Speechiness',
        'fontSize': 30,
        'fontWeight': 'bold',
        'anchor': 'middle',
        'offset': 20
    }
)

# Trend line
trend_line = scatter.transform_regression(
    'Instrumentalness', 'Speechiness'
).mark_line(
    color='red',
    size=3
)

# Combine the scatter plot with the trend line
scatter_trend = (scatter + trend_line).configure_view(
    strokeWidth=1,
    stroke='black'
)

scatter_trend