In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Sheet Music Analysis with Gemini

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fuse-cases%2Fdocument-processing%2Fsheet_music.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Run in Colab Enterprise
    </a>
  </td>       
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/use-cases/document-processing/sheet_music.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>


| | |
|-|-|
|Author(s) | [Holt Skinner](https://github.com/holtskinner) |

## Overview

[Sheet Music](https://en.wikipedia.org/wiki/Sheet_music) is the primary form of music notation used by composers and performers across the world. These pages contain information about the lyrics, pitches, rhythms, composer, text author, composition date, among others.

This notebook illustrates using Gemini to extract this metadata from sheet music PDFs.

These prompts and documents were demonstrated in the Google Cloud Next 2024 session "What's next with Gemini: Driving business impact with multimodal use cases".


## Getting Started


### Install Vertex AI SDK for Python

In [None]:
%pip install --upgrade --user -q google-cloud-aiplatform

### Restart current runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel.

In [None]:
# Restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️</b>
</div>



### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).


In [None]:
import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Define project information
PROJECT_ID = "YOUR_PROJECT_ID"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

# Initialize Vertex AI
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Import libraries


In [None]:
from IPython.display import Markdown, display

from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold,
    Part,
)

### Load the Gemini 1.5 Flash model

Gemini 1.5 Flash (`gemini-1.5-flash-001`) is a multimodal model that supports multimodal prompts. You can include text, image(s), PDFs, audio, and video in your prompt requests and get text or code responses.

In [None]:
generation_config = GenerationConfig(temperature=1.0, max_output_tokens=8192)
safety_settings = {
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH
}
model = GenerativeModel(
    model_name="gemini-1.5-flash-001",
    system_instruction="You are an expert in musicology and music history.",
    generation_config=generation_config,
    safety_settings=safety_settings,
)

## Extract Structured Metadata from Sheet Music PDF

For this example, we will be using the popular classical music book [24 Italian Songs and Arias of the 17th and 18th Centuries](https://imslp.org/wiki/24_Italian_Songs_and_Arias_of_the_17th_and_18th_Centuries_(Various)), and extracting metadata about each song in the book.

In [29]:
sheet_music_extraction_prompt = """The following document is a book of sheet music. Your task is to output structured metadata about every piece of music in the document. Correct any mistakes that are in the document and fill in missing information when not present in the document.

Include the following details:

Title
Composer with lifetime
Tempo Marking
Composition Year
A description of the piece
"""

# Load file directly from Google Cloud Storage
file_part = Part.from_uri(
    uri="gs://github-repo/use-cases/sheet-music/24ItalianSongs.pdf",
    mime_type="application/pdf",
)

# Send to Gemini
response = model.generate_content([sheet_music_extraction_prompt, file_part])

# Display results
display(Markdown(response.text))

## Twenty-Four Italian Songs and Arias

**Per la gloria d'adorarvi**
* **For the love my heart doth prize** 
* **Giovanni Battista Bononcini** (1670 - 1750)
* **Andante** = 80
* **1700**
* An aria from the opera *Griselda* about unrequited love, characterized by its sorrowful and introspective nature. The singer bemoans the pain of love, describing it as "vain" and "all in vain." The music itself mirrors this sentiment with its use of descending melodies and dissonances.

**Amarilli, mia bella**
* **Amarilli, my fair one**
* **Giulio Caccini** (1546-1618)
* **Moderato affettuoso** = 168
* **1600**
* A madrigal expressing the speaker's adoration for Amarilli.  The music is characterized by its gentle and flowing melodies, as well as its expressive use of ornamentation. The singer uses many metaphors to describe the depth of his feelings, highlighting Amarilli's beauty and the strength of his love.

**Alma del core**
* **Fairest adored**
* **Antonio Caldara** (1670-1736)
* **Tempo di Minuetto** 
* **1700**
*  A song of adoration for the person's heart. It is a simple but beautiful piece, with a gentle and flowing melody. The singer expresses their love and admiration with phrases like "Fair-est a-dor-ed" and "Thy faithful love". 

**Come raggio di sol**
* **As on the swelling wave**
* **Antonio Caldara** (1670-1736)
* **Sostenuto** = 140
* **1700**
* An aria depicting a serene and majestic scene of a swelling wave reflecting sunlight, with a beautiful and flowing melody.  The vocal line captures the movement of the waves, and the accompanying piano evokes the shimmering light.

**Sebben, crudele**
* **Th' not deserving**
* **Antonio Caldara** (1670-1736)
* **Allegretto grazioso** = 54
* **1700**
*  A canzonetta characterized by its bittersweet and melancholic mood. The singer expresses their pain and sorrow in a complex and nuanced way, highlighting the cruel treatment they have received. The music features contrasting sections, shifting between gentle, flowing melodies and more intense, dramatic moments.

**Vittoria, mio core!**
* **Victorious my heart is!**
* **Giacomo Carissimi** (1605-1674)
* **Allegro con brio** = 168
* **1650**
* A cantata that celebrates the triumph of love over adversity. The music is energetic and lively, with a strong sense of purpose.  The singer rejoices in their victory, with phrases like "Victorious my heart is!" and "For love now has broken its shackles in twain."

**Danza, danza, fanciulla gentile**
* **Dance, O dance, maiden gay**
* **Francesco Durante** (1684-1755)
* **Allegro con spirito** = 138
* **1700**
* An arietta that encourages a young maiden to dance. The music is bright and cheerful, with a simple, repeated melody. It features a delightful refrain "Dance, O dance, maiden gay".

**Vergin, tutto amor**
* **Virgin, fount of love**
* **Francesco Durante** (1684-1755)
* **Largo religioso** = 140
* **1700**
* A prayer addressed to the Virgin Mary, expressing gratitude and seeking her intercession. The music is slow and contemplative, reflecting the solemn nature of the prayer. 

**Caro mio ben**
* **Thou, all my bliss**
* **Giuseppe Giordani** (Giordaniello) (1744-1798)
* **Larghetto** = 60
* **1700**
* An arietta expressing deep affection and longing for a beloved. It is a simple but heartfelt piece, with a gentle and flowing melody.  The singer declares their love and expresses their longing for their lover's presence with phrases like "Thou, all my bliss" and "My heart is forlorn."

**O del mio dolce ardor**
* **O thou belov'd**
* **Christoph Willibald von Gluck** (1714-1787)
* **Moderato** = 140
* **1700**
* An aria reflecting on the power of love, expressing admiration and longing for the beloved.  The music features a melody that rises and falls, mirroring the emotional ebb and flow of the singer's feelings. 

**Che fiero costume**
* **How void of compassion**
* **Giovanni Legrenzi** (1626-1690)
* **Allegretto con moto** = 56
* **1680**
* An arietta about a person who is cold and unfeeling, highlighting their lack of compassion. The music is characterized by its sharp and angular melodies, creating a sense of detachment and coldness.

**Pur dicesti, o bocca bella**
* **Mouth so charmful**
* **Antonio Lotti** (1667-1740)
* **Allegretto grazioso** = 69
* **1700**
* An arietta expressing admiration and longing for a charming and beautiful person, their enchanting words being compared to sweet lures. The melody is light and playful, reflecting the beauty and allure of the person.

**Il mio bel foco**
* **My joyful ardor**
* **Benedetto Marcello** (1686-1739)
* **Recitativo ed Aria** 
* **1700**
* A recitative and aria expressing the joy and fervor of love.  The recitative sets the scene, and the aria builds to a powerful climax. The music is passionate and vibrant, reflecting the speaker's intense feelings of love.

**Lasciatemi morire!**
* **No longer let me languish**
* **Claudio Monteverdi** (1567-1643)
* **Lento** = 58
* **1600**
* An aria from the opera *Ariana*,  about the singer's plea to be allowed to die. The music is slow and mournful, reflecting the singer's despair.  The vocal line is filled with long, drawn-out notes, and the piano accompaniment creates a sense of sadness and resignation.

**Nel cor piu non mi sento**
* **Why feels my heart so dormant**
* **Giovanni Paisiello** (1740-1816)
* **Andantino** = 68
* **1700**
* A song reflecting on the speaker's feelings of apathy and indifference. The melody is slow and languid, mirroring the speaker's emotional state. The singer feels lifeless and compares their heart to a "torment." 

**Se tu m'ami, se sospiri**
* **If thou lov'st me**
* **Giovanni Battista Pergolesi** (1710-1736)
* **Andantino** = 88
* **1700**
*  An arietta about unrequited love, expressing the singer's longing for someone who doesn't love them back.  The music is sweet and melancholic, with a gentle melody that underscores the singer's sadness and yearning.

**Gia il sole dal Gange**
* **O'er Ganges now launches**
* **Alessandro Scarlatti** (1659-1725)
* **Allegro giusto** = 138
* **1700**
* A canzonetta about the Ganges River,  evoking a beautiful and vibrant scene. The music is lively and energetic, capturing the movement of the river and the warmth of the sun.

**Le Violette**
* **The Violets**
* **Alessandro Scarlatti** (1659-1725)
* **Allegretto**
* **1700**
* A canzone about violets, describing their beauty and fragrance.  The music is gentle and flowing, with a simple but lovely melody.

**O cessate di piagarmi**
* **O no longer seek to pain me**
* **Alessandro Scarlatti** (1659-1725)
* **Andante con moto** = 80
* **1700**
* An arietta with two sections, expressing a plea for someone to stop hurting the singer.  The first section is marked by a sense of resignation, while the second section becomes more impassioned. 

**Se Florindo è fedele**
* **Should Florindo be faithful**
* **Alessandro Scarlatti** (1659-1725)
* **Allegretto grazioso, moderato assai** = 132
* **1700**
*  An arietta with a hopeful yet uncertain tone. The singer expresses their longing for a faithful love, using the name Florindo as a symbol of true affection. The music features a mix of gentle and more intense melodies, reflecting the speaker's hopes and fears.

**Pieta, Signore!**
* **O Lord, have mercy**
* **Alessandro Stradella** (1639-1682)
* **Andantino**
* **1670**
* A plea for mercy, with a solemn and earnest tone. The music is slow and dramatic, emphasizing the singer's desperation and need for divine intervention. 

**Tu lo sai**
* **Ask thy heart**
* **Giuseppe Torelli** (1658-1709)
* **Andantino** 
* **1700**
*  A song reflecting on the singer's pain and asking the listener to empathize with them.  The music is gentle and introspective, with a simple melody that underscores the singer's heartfelt plea. 


You can see that Gemini extracted all of the relevant fields from the document.

### Song Identification with Audio

Now, let's try something more challenging, identifying a song being performed based on the sheet music. We have an audio clip of Holt Skinner performing one of the songs in the book, and we will ask Gemini to identify it based on the sheet music PDF.

In [31]:
song_identification_prompt = """Based on the sheet music PDF, what song is in the audio clip? Explain how you made the decision."""

# Load PDF file
pdf_part = Part.from_uri(
    uri="gs://github-repo/use-cases/sheet-music/24ItalianSongs.pdf",
    mime_type="application/pdf",
)

audio_part = Part.from_uri(
    uri="gs://github-repo/use-cases/sheet-music/24ItalianClip.mp3",
    mime_type="audio/mpeg",
)

# Send to Gemini
response = model.generate_content(
    [pdf_part, audio_part, song_identification_prompt]
)

# Display results
display(Markdown(response.text))

The song in the audio clip is *Sebben, crudele*. The audio clip contains a segment from the song that includes the lyrics *Sebben, crudele, mi fai languir, sempre fedele, sempre fedele, ti voglio amar.* These lyrics are repeated twice in a row and are unique to *Sebben, crudele*. There are no other songs in the collection that include these lyrics. 
