In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Sheet Music Analysis with Gemini

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fuse-cases%2Fdocument-processing%2Fsheet_music.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Run in Colab Enterprise
    </a>
  </td>       
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/use-cases/document-processing/sheet_music.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/53/X_logo_2023_original.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/sheet_music.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            


| | |
|-|-|
|Author(s) | [Holt Skinner](https://github.com/holtskinner) |

## Overview

[Sheet Music](https://en.wikipedia.org/wiki/Sheet_music) is the primary form of music notation used by composers and performers across the world. These pages contain information about the lyrics, pitches, rhythms, composer, text author, composition date, among others.

This notebook illustrates using Gemini to extract this metadata from sheet music PDFs.

These prompts and documents were demonstrated in the [Google Cloud Next 2024 session "What's next with Gemini: Driving business impact with multimodal use cases"](https://www.youtube.com/watch?v=DqH1R9Pk5RI).


## Getting Started


### Install Vertex AI SDK for Python

In [None]:
%pip install --upgrade --user -q google-cloud-aiplatform PyPDF2

### Restart current runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel.

In [None]:
# Restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).


In [None]:
import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [2]:
# Define project information
PROJECT_ID = "YOUR_PROJECT_ID"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

# Initialize Vertex AI
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Import libraries


In [4]:
import json

from IPython.display import Markdown, display
import PyPDF2
from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    HarmBlockThreshold,
    HarmCategory,
    Part,
)

### Load the Gemini 1.5 Flash model

Gemini 1.5 Flash (`gemini-1.5-flash`) is a multimodal model that supports multimodal prompts. You can include text, image(s), PDFs, audio, and video in your prompt requests and get text or code responses.

In [13]:
generation_config = GenerationConfig(temperature=1.0, max_output_tokens=8192)
safety_settings = {
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH
}
model = GenerativeModel(
    model_name="gemini-1.5-flash",
    system_instruction="You are an expert in musicology and music history.",
    generation_config=generation_config,
    safety_settings=safety_settings,
)

## Extract Structured Metadata from Sheet Music PDF

For this example, we will be using the popular classical music book [24 Italian Songs and Arias of the 17th and 18th Centuries](https://imslp.org/wiki/24_Italian_Songs_and_Arias_of_the_17th_and_18th_Centuries_(Various)), and extracting metadata about each song in the book.

In [14]:
sheet_music_pdf_uri = "gs://github-repo/use-cases/sheet-music/24ItalianSongs.pdf"

sheet_music_extraction_prompt = """The following document is a book of sheet music. Your task is to output structured metadata about every piece of music in the document. Correct any mistakes that are in the document and fill in missing information when not present in the document.

Include the following details:

Title
Composer with lifetime
Tempo Marking
Composition Year
A description of the piece
"""

# Load file directly from Google Cloud Storage
file_part = Part.from_uri(
    uri=sheet_music_pdf_uri,
    mime_type="application/pdf",
)

# Send to Gemini
response = model.generate_content([sheet_music_extraction_prompt, file_part])

# Display results
display(Markdown(response.text))

## **Twenty-Four Italian Songs and Arias of the Seventeenth and Eighteenth Centuries**

**1. Per la gloria d'adorarvi**
* **Title:** Per la gloria d'adorarvi (For the love my heart doth prize) 
* **Composer:** Giovanni Battista Bononcini (1670-1750)
* **Tempo Marking:** Andante, d=80
* **Composition Year:** c. 1715
* **Description:** An aria from the opera "Griselda," the piece expresses the speaker's longing and heartbreak, contrasting the idea of love with the pain it brings.

**2. Amarilli, mia bella**
* **Title:** Amarilli, mia bella (Amarilli, my fair one)
* **Composer:** Giulio Caccini (1545-1618)
* **Tempo Marking:** Moderato affettuoso, d=66
* **Composition Year:** c. 1600
* **Description:** This piece is a madrigal that expresses a passionate and heartfelt love for the beloved Amarilli, the melody is characterized by smooth and flowing lines, often accompanied by a gentle harmonic progression. 

**3. Alma del core**
* **Title:** Alma del core (Fairest adored) 
* **Composer:** Antonio Caldara (1670-1736)
* **Tempo Marking:** Tempo di Minuetto
* **Composition Year:** c. 1700 
* **Description:** An aria, the piece describes the speaker's deep admiration for the beloved, showcasing a lyrical melody and a clear harmonic structure.

**4. Come raggio di sol**
* **Title:** Come raggio di sol (As on the swelling wave) 
* **Composer:** Antonio Caldara (1670-1736)
* **Tempo Marking:** Sostenuto, d=56
* **Composition Year:** c. 1700
* **Description:** A flowing aria in which the speaker compares the beloved's beauty to the gentle, uplifting movement of waves. 

**5. Sebben, crudele**
* **Title:** Sebben, crudele (Th' not deserving)
* **Composer:** Antonio Caldara (1670-1736)
* **Tempo Marking:** Allegretto grazioso, d=54
* **Composition Year:** c. 1700
* **Description:** A canzonetta that expresses a sorrowful plea to a cruel love, with a more dramatic and passionate melody.

**6. Vittoria, mio core!**
* **Title:** Vittoria, mio core! (Victorious my heart is!)
* **Composer:** Giacomo Carissimi (1605-1674)
* **Tempo Marking:** Allegro con brio, d=168
* **Composition Year:** c. 1650
* **Description:** A cantata that celebrates the triumph of love and the joy of the heart, featuring a powerful and energetic melody.

**7. Danza, danza, fanciulla gentile**
* **Title:** Danza, danza, fanciulla gentile (Dance, O dance, maiden gay)
* **Composer:** Francesco Durante (1684-1755)
* **Tempo Marking:** Allegro con spirito, d=138
* **Composition Year:** c. 1720
* **Description:** A playful arietta that captures the joy and spirit of dancing with its lively melody.

**8. Vergin, tutto amor**
* **Title:** Vergin, tutto amor (Virgin, fount of love)
* **Composer:** Francesco Durante (1684-1755)
* **Tempo Marking:** Largo religioso, d=40
* **Composition Year:** c. 1730
* **Description:** This aria expresses a heartfelt prayer to the Virgin Mary, with a solemn and devotional melody.

**9. Caro mio ben**
* **Title:** Caro mio ben (Thou, all my bliss)
* **Composer:** Giuseppe  Giordani (Giordano) (1744-1798)
* **Tempo Marking:** Larghetto, d=60
* **Composition Year:** c. 1780
* **Description:** A light and charming arietta, showcasing the speaker's overwhelming love for their beloved.

**10. O del mio dolce ardor**
* **Title:** O del mio dolce ardor (O thou belov'd)
* **Composer:** Christoph Willibald von Gluck (1714-1787)
* **Tempo Marking:** Moderato, d=48
* **Composition Year:** c. 1760
* **Description:** An aria that expresses a longing and adoration for the beloved, featuring a simple but deeply moving melody.

**11. Che fiero costume**
* **Title:** Che fiero costume (How void of compassion)
* **Composer:** Giovanni Legrenzi (1626-1690)
* **Tempo Marking:** Allegretto con moto, d=58
* **Composition Year:** c. 1680
* **Description:** An arietta that laments the cruelty of fate, featuring a dramatic and contrasting melody.

**12. Pur dicesti, o bocca bella**
* **Title:** Pur dicesti, o bocca bella (Mouth so charmful)
* **Composer:** Antonio Lotti (1667-1740)
* **Tempo Marking:** Allegretto grazioso, d=69
* **Composition Year:** c. 1710
* **Description:** A charming arietta that marvels at the beauty of the beloved's voice, featuring a sweet and flowing melody.

**13. Il mio bel foco**
* **Title:** Il mio bel foco (My joyful ardor)
* **Composer:** Benedetto Marcello (1686-1739)
* **Tempo Marking:** Recitativo ed Aria
* **Composition Year:** c. 1720
* **Description:** This piece features a recitative followed by an aria, the music expresses the speaker's unwavering devotion and love, with both a spoken recitative section and a lyrical aria.

**14. Lasciatemi morire!**
* **Title:** Lasciatemi morire! (No longer let me languish)
* **Composer:** Claudio Monteverdi (1567-1643)
* **Tempo Marking:** Lento, d=48
* **Composition Year:** c. 1610
* **Description:** An aria from the opera "Ariana," the music reflects the speaker's sorrow and despair, with a slow and somber melody.

**15. Nel cor più non mi sento**
* **Title:** Nel cor più non mi sento (Why feels my heart so dormant)
* **Composer:** Giovanni Paisiello (1740-1816)
* **Tempo Marking:** Andantino, d=58
* **Composition Year:** c. 1770
* **Description:** An arietta that describes a state of melancholy and detachment, showcasing a melancholic melody.

**16. Se tu m'ami, se sospiri**
* **Title:** Se tu m'ami, se sospiri (If thou lovest me)
* **Composer:** Giovanni Battista Pergolesi (1710-1736)
* **Tempo Marking:** Andantino, d=58
* **Composition Year:** c. 1730
* **Description:** This piece is attributed to Pergolesi but was actually composed by Lorenzo Vincenzo Ciampi, a charming arietta that expresses the speaker's longing and desire to be loved.

**17. Gia il sole dal Gange**
* **Title:** Gia il sole dal Gange (O'er Ganges now launches)
* **Composer:** Alessandro Scarlatti (1659-1725)
* **Tempo Marking:** Allegro giusto, d=138
* **Composition Year:** c. 1710
* **Description:** A canzonetta, the music paints a vivid picture of a beautiful sunrise, featuring a vibrant and flowing melody.

**18. Le Violette**
* **Title:** Le Violette (The Violets)
* **Composer:** Alessandro Scarlatti (1659-1725)
* **Tempo Marking:** Allegretto 
* **Composition Year:** c. 1710
* **Description:** A canzone that celebrates the beauty of violets, showcasing a gentle and lyrical melody.

**19. O cessate di piagarmi**
* **Title:** O cessate di piagarmi (O no longer seek to pain me)
* **Composer:** Alessandro Scarlatti (1659-1725)
* **Tempo Marking:** Andante con moto, d=80
* **Composition Year:** c. 1710
* **Description:** A powerful arietta that pleads for mercy, featuring a dramatic and passionate melody.

**20. Se Florindo è fedele**
* **Title:** Se Florindo è fedele (Should Florindo be faithful) 
* **Composer:** Alessandro Scarlatti (1659-1725)
* **Tempo Marking:** Allegretto grazioso, moderato assai, d=132
* **Composition Year:** c. 1710
* **Description:** A charming arietta that expresses the speaker's hopes for a faithful love, featuring a light and playful melody.

**21. Pietà, Signore!**
* **Title:** Pietà, Signore! (O Lord, have mercy)
* **Composer:** Alessandro Stradella (1639-1682)
* **Tempo Marking:** Andantino
* **Composition Year:** c. 1670
* **Description:** A heartfelt plea for mercy, showcasing a dramatic and expressive melody.

**22. Tu lo sai**
* **Title:** Tu lo sai (Ask thy heart)
* **Composer:** Giuseppe Torelli (1658-1709)
* **Tempo Marking:** Andantino
* **Composition Year:** c. 1690
* **Description:** This arietta features a gentle and reflective melody, expressing a yearning for love and understanding.

You can see that Gemini extracted all of the relevant fields from the document.

### Song Identification with Audio

Now, let's try something more challenging, identifying a song being performed based on the sheet music. We have an audio clip of Holt Skinner performing one of the songs in the book, and we will ask Gemini to identify it based on the sheet music PDF.

In [15]:
song_identification_prompt = """Based on the sheet music PDF, what song is in the audio clip? Explain how you made the decision."""

# Load PDF file
pdf_part = Part.from_uri(
    uri=sheet_music_pdf_uri,
    mime_type="application/pdf",
)

audio_part = Part.from_uri(
    uri="gs://github-repo/use-cases/sheet-music/24ItalianClip.mp3",
    mime_type="audio/mpeg",
)

# Send to Gemini
response = model.generate_content([pdf_part, audio_part, song_identification_prompt])

# Display results
display(Markdown(response.text))

The song is "Sebben, crudele" by Antonio Caldara. The sheet music begins with the title "Sebben, crudele" and then continues with the beginning lines of the song. 


### Edit PDF Metadata

Next, we'll use the output from Gemini to edit the metadata of a PDF containing one song, which can make it easier to organize this file in sheet music applications.

We'll adjust the prompt slightly and set the [`response_mime_type`](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini#:~:text=in%20the%20list.-,responseMimeType,-(Preview)) to get the response in JSON format.

In [10]:
sheet_music_pdf_uri = "gs://github-repo/use-cases/sheet-music/SebbenCrudele.pdf"

sheet_music_extraction_prompt = """The following document is a piece of sheet music. Your task is to output structured metadata about the piece of music in the document. Correct any mistakes that are in the document and fill in missing information when not present in the document.

Output the data in the following JSON format:

{
    "/Title": "Title of the piece",
    "/Author": "Composer(s) of the piece",
    "/Subject": "Music Genre(s) in a comma separated list",
}

"""

# Load file directly from Google Cloud Storage
file_part = Part.from_uri(
    uri=sheet_music_pdf_uri,
    mime_type="application/pdf",
)

generation_config = GenerationConfig(
    temperature=1, response_mime_type="application/json"
)

# Send to Gemini
response = model.generate_content(
    [sheet_music_extraction_prompt, file_part], generation_config=generation_config
)

# Display results
display(Markdown(response.text))

new_metadata = json.loads(response.text)

{"/Title": "Sebben, crudele", "/Author": "Antonio Caldara", "/Subject": "Canzonetta, Aria"}


Next, we'll download the PDF from the GCS Bucket and edit the metadata using the [`PyPDF2`](https://pypdf2.readthedocs.io/en/3.x/) library.

In [None]:
! gcloud storage cp {sheet_music_pdf_uri} .

In [16]:
def edit_pdf_metadata(file_path: str, new_metadata: dict) -> None:
    """Edits metadata of a PDF file.

    Args:
        file_path (str): Path to the PDF file.
        new_metadata (dict): Dictionary containing the new metadata fields and values.
            Example: {'/Author': 'John Doe', '/Title': 'My Report'}
    """

    with open(file_path, "rb") as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        pdf_writer = PyPDF2.PdfWriter()

        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            pdf_writer.add_page(page)

        pdf_writer.add_metadata(new_metadata)

        with open(file_path, "wb") as out_file:
            pdf_writer.write(out_file)


edit_pdf_metadata("SebbenCrudele.pdf", new_metadata)