In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/30000-spotify-songs/readme.md
/kaggle/input/30000-spotify-songs/spotify_songs.csv


In [2]:
!pip install -q pennylane pandas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m930.8/930.8 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m60.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.9/167.9 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import pennylane as qml
from pennylane import numpy as pnp

In [4]:
spotify_df = pd.read_csv("/kaggle/input/30000-spotify-songs/spotify_songs.csv")

In [5]:
spotify_df.rename(columns={'name': 'track_name', 'artists': 'artist'}, inplace=True)
spotify_df.dropna(subset=["valence", "energy", "acousticness", "danceability"], inplace=True)

In [6]:
def score_from_prompt(prompt: str):
    prompt = prompt.lower()
    return {
        'valence': 0.7 if 'happy' in prompt or 'nostalgic' in prompt else 0.3,
        'energy': 0.5 if 'soft' in prompt else 0.8,
        'acousticness': 0.8 if 'acoustic' in prompt or 'soft' in prompt else 0.2,
        'danceability': 0.5
    }

In [7]:
def score_dataset(df, prompt):
    target = score_from_prompt(prompt)
    df = df.copy()
    for key in target:
        df[f'{key}_diff'] = abs(df[key] - target[key])
    df['total_diff'] = df[[f'{k}_diff' for k in target]].sum(axis=1)
    return df.sort_values("total_diff")

In [8]:
def make_oracle(target_index, wires):
    bin_str = format(target_index, f"0{len(wires)}b")
    for i, bit in enumerate(bin_str):
        if bit == '0':
            qml.PauliX(wires=wires[i])
    qml.MultiControlledX(wires=wires) 
    for i, bit in enumerate(bin_str):
        if bit == '0':
            qml.PauliX(wires=wires[i])

In [9]:
def make_diffuser(wires):
    for wire in wires:
        qml.Hadamard(wires=wire)
        qml.PauliX(wires=wire)
    qml.MultiControlledX(wires=wires)
    for wire in wires:
        qml.PauliX(wires=wire)
        qml.Hadamard(wires=wire)

In [10]:
def grover_search(n_qubits, target_index):
    dev = qml.device("default.qubit", wires=n_qubits)

    @qml.qnode(dev)
    def circuit():
        wires = list(range(n_qubits))
        for wire in wires:
            qml.Hadamard(wires=wire)
        make_oracle(target_index, wires)
        make_diffuser(wires)
        return qml.probs(wires=wires)

    return circuit

In [11]:
def run_grover_search(df):
    df = df.head(8)
    min_index = df["total_diff"].idxmin()
    target_index = df.index.get_loc(min_index)
    n_qubits = 3
    circuit = grover_search(n_qubits, target_index)
    probs = circuit()
    selected = int(pnp.argmax(probs))
    return df.iloc[selected]

In [12]:
class SongSearchPipeline:
    def __init__(self, data):
        self.data = data

    def run(self, prompt, use_quantum=True):
        scored = score_dataset(self.data, prompt)
        if use_quantum:
            result = run_grover_search(scored)
        else:
            result = scored.iloc[0]
        return {
            "Track": result.get("track_name"),
            "Artist": result.get("track_artist"),
            "Score": result.get("total_diff")
        }

In [13]:
pipeline = SongSearchPipeline(spotify_df)
prompt = "play energy"
result = pipeline.run(prompt, use_quantum=True)
print(result)

{'Track': 'How You Love Me', 'Artist': '3LAU', 'Score': 0.04600000000000004}
