In [1]:
import pandas as pd
import ollama

In [2]:
file_path = "genreLyrics.csv"

In [3]:
# Read CSV file witη separation
df = pd.read_csv(file_path, sep="\t", encoding="utf-8")

# Clean column names
df.columns = df.columns.str.strip().str.replace("\t", "")

# Display dataset
print(df.head())
print("\nColumn Names:", df.columns)

   Unnamed: 0       genre                                             lyrics
0       76301        Rock  Hey, if you were right I'd chase away\nAll the...
1      293332        Rock  There's something about the way we fit\nThere'...
2       70683  Electronic  One drop in the ocean\nCould be that magic pot...
3      209590        Rock  I'm so tired of being here\nSuppressed by all ...
4      116010     Hip-Hop  Yeah, what, Vast Aire,; Shell Shock..\nIt's th...

Column Names: Index(['Unnamed: 0', 'genre', 'lyrics'], dtype='object')


In [4]:
# Verify dataset structure
print("\nFirst 15 Rows:\n", df.head(15))


First 15 Rows:
     Unnamed: 0       genre                                             lyrics
0        76301        Rock  Hey, if you were right I'd chase away\nAll the...
1       293332        Rock  There's something about the way we fit\nThere'...
2        70683  Electronic  One drop in the ocean\nCould be that magic pot...
3       209590        Rock  I'm so tired of being here\nSuppressed by all ...
4       116010     Hip-Hop  Yeah, what, Vast Aire,; Shell Shock..\nIt's th...
5       186881        Rock  He said he'd be here at seven\nThe clock just ...
6       294994        Rock  Wir waren mehr als Freunde\nwir warn wie Brder...
7       182846        Rock  bokura ha sonna ni mo ooku no koto nado\nnozon...
8       315968         Pop  Love is free love is love\nThe world united\nI...
9       242844     Hip-Hop  Yeah, it's time to get crunked up in this bitc...
10      148419        Rock  Outside is a light\nWhich says you've got to f...
11      343109     Hip-Hop  Yes sir is Bobby Ra

In [5]:
# Extract unique genres
if 'genre' in df.columns:
    genres = df['genre'].unique().tolist()
    print("\nGenres found:", genres)
else:
    print("Error: 'genre' column not found.")


Genres found: ['Rock', 'Electronic', 'Hip-Hop', 'Pop', 'Other', 'R&B', 'Country', 'Jazz', 'Metal', 'Folk', 'Indie']


In [6]:
#1) ZERO SHOT PROMPTING

import pandas as pd
import ollama

# Read the CSV
df = pd.read_csv("genreLyrics.csv", sep="\t", engine="python", on_bad_lines="skip", encoding="utf-8")

# Check column names again
print("Column Names:", df.columns)

df.columns = df.columns.str.strip()

if 'genre' not in df.columns or 'lyrics' not in df.columns:
    raise KeyError("Columns 'genre' and 'lyrics' not found! Check dataset structure.")

# make a list of all unique genres, without none
genres = [genre for genre in df['genre'].dropna().unique().tolist()]
print("\nGenres found:", genres)

#Select a 40 sample lyrics for efficiency
sample_lyrics = df['lyrics'].dropna().tolist()[:40]

#Zero-Shot Prompt Function
def get_zs_prompt(lyrics):
    return f"Classify the genre of the following song lyrics:\n\"{lyrics}\"\nAvailable genres: {', '.join(genres)}.\nReply with only the genre name."

#Classify 40 lyrics// Zero-Shot prompting
responses = []
for i, sample_lyric in enumerate(sample_lyrics):
    zs_prompt = get_zs_prompt(sample_lyric)
    
    # Ollama 
    zs_response = ollama.chat(model="llama3", messages=[{"role": "user", "content": zs_prompt}])
    
    #Results
    responses.append((sample_lyric, zs_response['message']['content']))

#Printing the  results for the 40 lyrics
for i, (lyric, genre) in enumerate(responses):
    print(f"\nLyrics Sample {i+1}:\n", lyric)
    print("\nZero-Shot Predicted Genre:", genre)

Column Names: Index(['Unnamed: 0', 'genre', 'lyrics'], dtype='object')

Genres found: ['Rock', 'Electronic', 'Hip-Hop', 'Pop', 'Other', 'R&B', 'Country', 'Jazz', 'Metal', 'Folk', 'Indie']

Lyrics Sample 1:
 Hey, if you were right I'd chase away
All the reason for my old desire to change
But the right words, don't improvise the ideals
My body sails into a passage waiting in vain
It might just be so uninviting, is it comical
From until now I've gone to something else
I'll never forget how I once saw myself
If not for the way I'd changed, if not for the way
Say, every hour is always the same
Gazing endlessly to the usual clouds I'd always collect
As the town slips with the hatfull by the sunset
And paralyze the aspirations of the day
The rightous become the silenced
The fallen ones, they'll never find that road that's fallen them
I'll never forget how I once saw myself
If not for the way I'd changed, if not for the way
It's not quite the same
I'll never forget how I once saw myself
If not

In [7]:
# 2) FEW-SHOT PROMPTING

import pandas as pd
import ollama

# Read CSV 
df = pd.read_csv("genreLyrics.csv", delimiter=r'\s*\t\s*', engine='python', on_bad_lines='skip', encoding="utf-8")

# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Unique genres, without none
genres = df['genre'].dropna().unique().tolist()
print("\nGenres found:", genres)

# Selecting the first 40 sample lyric
sample_lyrics = df['lyrics'].dropna().tolist()[:40]

# Few-Shot Prompt Function
def get_fs_prompt(lyrics):
    # Sample 5 examples
    few_shot_examples = df.dropna(subset=['lyrics', 'genre']).sample(n=5, random_state=1)
    
    
    examples = "\n".join(f"Example {i+1}:\nLyrics: {row['lyrics']}\nGenre: {row['genre']}\n" 
                          for i, (_, row) in enumerate(few_shot_examples.iterrows()))

    # Construct the prompt
    return (
        "You are a highly skilled music expert specializing in genre classification. "
        "Your task is to analyze song lyrics and determine their most appropriate genre.\n\n"
        "Here are some reference examples:\n\n"
        f"{examples}\n"
        "Now, analyze the following lyrics and classify them into one of the known genres:\n\n"
        f"Lyrics:\n{lyrics}\n\n"
        "Please respond with only the genre name."
    )

# Classify the 40 lyrics // Ollama Few-Shot prompting
responses = []
for i, sample_lyric in enumerate(sample_lyrics):
    fs_prompt = get_fs_prompt(sample_lyric)
    
    # Ollama 
    fs_response = ollama.chat(model="llama3", messages=[{"role": "user", "content": fs_prompt}])

    # Respond saved
    predicted_genre = fs_response.get('message', {}).get('content', 'Unknown')
    responses.append((sample_lyric, predicted_genre))

# Print the results for the 40 songs' lyrics
for i, (lyric, genre) in enumerate(responses, 1):
    print(f"\nLyrics Sample {i}:\n{lyric}")
    print("\nFew-Shot Predicted Genre:", genre)


Genres found: ['Rock', 'Electronic', 'Hip-Hop', 'Pop', 'Other', 'R&B', 'Country', 'Jazz', 'Metal', 'Folk', 'Indie', '-Boy style']

Lyrics Sample 1:
"Hey, if you were right I'd chase away

Few-Shot Predicted Genre: Rock

Lyrics Sample 2:
"There's something about the way we fit

Few-Shot Predicted Genre: Rock

Lyrics Sample 3:
"One drop in the ocean

Few-Shot Predicted Genre: Rock

Lyrics Sample 4:
"I'm so tired of being here

Few-Shot Predicted Genre: Rock

Lyrics Sample 5:
"Yeah, what, Vast Aire,; Shell Shock..

Few-Shot Predicted Genre: Hip-Hop/Rap

Lyrics Sample 6:
"He said he'd be here at seven

Few-Shot Predicted Genre: Rock

Lyrics Sample 7:
"Wir waren mehr als Freunde

Few-Shot Predicted Genre: Rock

Lyrics Sample 8:
"bokura ha sonna ni mo ooku no koto nado

Few-Shot Predicted Genre: Rock

Lyrics Sample 9:
"Love is free love is love

Few-Shot Predicted Genre: Pop

Lyrics Sample 10:
"Yeah, it's time to get crunked up in this bitch

Few-Shot Predicted Genre: Hip-Hop/Rap

Lyrics Sa

In [13]:
#3) Performance of both strategies of the model using Precision, Recall and F1 Score.

import pandas as pd
from sklearn.metrics import classification_report

# True labels 
true_labels = ['Rock', 'Electronic', 'Hip-Hop', 'Pop', 'Other', 'R&B', 'Country', 'Jazz',
               'Metal', 'Folk', 'Indie', 'J-Pop', 'Rap', 'Classical', 'Non'] #THE ACTUAL GENRES OF THE CLASSIFICATION

# Zero-Shot Predictions
zs_predictions = [
    'Indie', 'Rock', 'Pop', 'Rock', 'Hip-Hop', 'Rock', 'Pop', 'J-Pop', 'Pop', 'Hip-Hop', 
    'Pop', 'Hip-Hop', 'Rock', 'Folk', 'Rock', 'Pop', 'Rock', 'Country', 'Hip-Hop', 'Pop', 
    'Folk', 'Rock', 'Pop', 'Rock', 'Rock', 'Metal', 'Country', 'Rock', 'Country', 'Pop', 
    'Rock', 'Pop', 'Pop', 'Hip-Hop', 'Rock', 'Pop', 'Rock', 'Hip-Hop', 'Pop', 'Country'
]

# Few-Shot Predictions
fs_predictions = [
    'Rock', 'Rock', 'Rock', 'Rock', 'Hip-Hop/Rap', 'Rock', 'Rock', 'Rock', 'Pop', 'Hip-Hop/Rap', 
    'Rock', 'Hip-Hop/Rap', 'Rock', 'Rock', 'Rock', 'Rock', 'Rock', 'Rock', 'Hip-Hop/Rap', 'Rock', 
    'Classical', 'Rock', 'Rock', 'Rock', 'Metal', 'Rock', 'Rock', 'Rock', 'Rock', 'Rock', 'Pop', 
    'Rock', 'Hip-Hop/Rap', 'Rock', 'Rock', 'Rock', 'Non', 'Rock', 'Rock'
]

# The valid genres
genres = ['Rock', 'Electronic', 'Hip-Hop', 'Pop', 'Other', 'R&B', 'Country', 'Jazz', 'Metal', 'Folk', 'Indie'] #THE VALID ONES WE HAVE

# Clean the predictions
zs_clean = [p if p in genres else 'unknown' for p in zs_predictions]
fs_clean = [p if p in genres else 'unknown' for p in fs_predictions]

# Print the lengths of the lists
print("Length of true_labels:", len(true_labels))
print("Length of zs_clean:", len(zs_clean))
print("Length of fs_clean:", len(fs_clean))

Length of true_labels: 15
Length of zs_clean: 40
Length of fs_clean: 39


In [14]:
# Trim zs_clean and fs_clean to the first 15 predictions
zs_clean = zs_clean[:15]
fs_clean = fs_clean[:15]

# Check lengths to ensure they match
print(f"Length of true_labels: {len(true_labels)}")
print(f"Length of zs_clean: {len(zs_clean)}")
print(f"Length of fs_clean: {len(fs_clean)}")

# Ensure the lengths are the same for classification report
assert len(true_labels) == len(zs_clean) == len(fs_clean), "Mismatch in label lengths!"

# Continue with the classification report
print("\nZero-Shot Classification Report")
from sklearn.metrics import classification_report
print(classification_report(true_labels, zs_clean, zero_division=0))

print("\nFew-Shot Classification Report")
print(classification_report(true_labels, fs_clean, zero_division=0))

Length of true_labels: 15
Length of zs_clean: 15
Length of fs_clean: 15

Zero-Shot Classification Report
              precision    recall  f1-score   support

   Classical       0.00      0.00      0.00       1.0
     Country       0.00      0.00      0.00       1.0
  Electronic       0.00      0.00      0.00       1.0
        Folk       0.00      0.00      0.00       1.0
     Hip-Hop       0.00      0.00      0.00       1.0
       Indie       0.00      0.00      0.00       1.0
       J-Pop       0.00      0.00      0.00       1.0
        Jazz       0.00      0.00      0.00       1.0
       Metal       0.00      0.00      0.00       1.0
         Non       0.00      0.00      0.00       1.0
       Other       0.00      0.00      0.00       1.0
         Pop       0.00      0.00      0.00       1.0
         R&B       0.00      0.00      0.00       1.0
         Rap       0.00      0.00      0.00       1.0
        Rock       0.00      0.00      0.00       1.0
     unknown       0.00      0