# Filtering

In [68]:
import pandas as pd
import os

songs_df = pd.read_csv(os.path.join("data", "songs_v1.csv"))
print("Length before filtering: ", len(songs_df))

Length before filtering:  10854


1. Drop duplicates:

In [69]:
songs_df = songs_df.drop_duplicates(subset=["title", "artist"])
print("Length after dropping duplicates: ", len(songs_df))

Length after dropping duplicates:  10804


2. Filter non ukr artists and dublicates of the same artist:

In [70]:
ARTISTS_BLACKLIST = [
    "Poshlaya Molly", "Poshlaja Molli", "–°–º–µ—Ç–∞–Ω–∞ band", "Corn Wave", "–ü–æ—Ç–∞–ø –ò –ù–∞—Å—Ç—è –ö–∞–º–µ–Ω—Å–∫–∏—Ö", 
    "–°–≤–µ—Ç–ª–∞–Ω–∞ –õ–æ–±–æ–¥–∞", "Valentin Strykalo", "–í–µ—Ä–∫–∞ –°–µ—Ä–¥—é—á–∫–∞", "L—ék", "kavabanga Depo kolibri", 
    "Quest Pistols Show", "–ö–∞–∫–∞—è –†–∞–∑–Ω–∏—Ü–∞", "Green Grey", "jockii druce", "–ú–∏—à–∞ –ú–∞—Ä–≤–∏–Ω", 
    "–û–ª—è –ü–æ–ª—è–∫–æ–≤–∞", "–ú–∏—Ö–∞–∏–ª –ë—É–±–ª–∏–∫", "–ê–ª—ë–Ω–∞ –í–∏–Ω–Ω–∏—Ü–∫–∞—è", "–ù–∞—Ç–∞–ª—å—è –ú–æ–≥–∏–ª–µ–≤—Å–∫–∞—è",
    "–¢–∞–∏—Å–∏—è –ü–æ–≤–∞–ª–∏–π", "–°–µ—Ä–≥—ñ–π –ñ–∞–¥–∞–Ω –¢–∞ –°–æ–±–∞–∫–∏ –í –ö–æ—Å–º–æ—Å—ñ", "Poshlaya Molly",
    "Valentin Strykalo", "INGRET", "Svitlana Nianio", "Vasily Richter",
    "MARUV & Boosin", "–°—É—Å—ñ–¥–∏ –°—Ç–µ—Ä–ø–ª—è—Ç—å", "The Feels", "Grandma's smuzi", 
    "–¢–∞–±—É–ª–∞ –†–∞—Å–∞", "–û–ª–µ–≥ –ö–µ–Ω–∑–æ–≤", "Potap & Nastya", "–í–∏—Ç–∞–ª–∏–π –ß–∏—Ä–≤–∞", "TELLY GRAVE",
    "–ê–Ω–∞—Å—Ç–∞—Å–∏—è –ü—Ä–∏—Ö–æ–¥—å–∫–æ", "glichery", "AShamaluevMusic", "daKooka", "4Wheel", "KLIM", "CLONNEX", 
    "bris", "Pencil Legs", "Mykola Dmytrovych Leontovych", "Odyn v kanoe", 
    "–°–≤—è—Ç–æ—Å–ª–∞–≤ –í–∞–∫–∞—Ä—á—É–∫", "DJ Jedy", "–ú–∞—Ä—ñ—è –ß–∞–π–∫–æ–≤—Å—å–∫–∞", "The Ukrainians", "Los Colorados", 
    "–¢—ñ–Ω–∞ –ö–∞—Ä–æ–ª—å", "–Ü—Ä–∏–Ω–∞ –ë—ñ–ª–∏–∫", "national radio"
    ]

songs_df = songs_df[~songs_df["artist"].isin(ARTISTS_BLACKLIST)]
print("Length after removing blacklisted artists: ", len(songs_df))

Length after removing blacklisted artists:  5872


3. Remove songs that have brackets in the title (probably remixes of the original songs) or are too long:

In [71]:
songs_df = songs_df[~songs_df["title"].str.contains("\(") | ~songs_df["title"].str.contains("\)")]
songs_df = songs_df[~songs_df["title"].str.contains("\[") | ~songs_df["title"].str.contains("\]")]
songs_df = songs_df[~songs_df["title"].str.contains("\|")]
songs_df = songs_df[~songs_df["title"].str.contains("\.")]
songs_df = songs_df[songs_df["title"].str.len() < 30]

print("Length after filtering based on song title: ", len(songs_df))

Length after filtering based on song title:  4417


4. Filter non ukr artists based on the songs' tags:

Note: don't include russian here as a considerable amount of ukr songs have a missleading russian tag

In [72]:
TAGS_BLACKLIST = ["finnish", "japanese"]
songs_df = songs_df[~songs_df["title_tags"].str.contains("|".join(TAGS_BLACKLIST))]
print("Length after filtering based on title tags: ", len(songs_df))

Length after filtering based on title tags:  4341


Save the filtered songs to a new *.csv file:

In [None]:
songs_df.to_csv(os.path.join("data", "songs_filtered_v1.csv"), index=False)

# Data summary after filtering

In [93]:
import pandas as pd
import os

filtered_songs_df = pd.read_csv(os.path.join("data", "songs_filtered_v1.csv"))

In [97]:
print("Number of ukr artists: ", len(filtered_songs_df["artist"].unique()))

Number of ukr artists:  62


In [98]:
songs_per_artist = filtered_songs_df["artist"].value_counts()

print(f"Total number of songs: {len(filtered_songs_df)}\n")
print(f"Number of songs per artist:\n{songs_per_artist.describe()}")

Total number of songs: 4341

Number of songs per artist:
count     62.000000
mean      70.016129
std       21.063909
min       18.000000
25%       53.000000
50%       75.500000
75%       87.750000
max      100.000000
Name: artist, dtype: float64


Save the current blacklist version to a *.txt file":

In [None]:
with open(os.path.join("configs", "artists_blacklist_v1.txt"), "w", encoding="utf-8") as f:
    f.write("\n".join(ARTISTS_BLACKLIST))

# Concat with YT fetched results

In [None]:
import pandas as pd
import os

filtered_songs_df = pd.read_csv(os.path.join("data", "songs_filtered_v1.csv"))
len(filtered_songs_df)

4341

In [None]:
audios_5_df = pd.read_csv(os.path.join("metadata", "yt_songs_5_2_pages_filtered.csv"))
audios_10_df = pd.read_csv(os.path.join("metadata", "yt_songs_10_2_pages_filtered.csv"))
audios_more_df = pd.read_csv(os.path.join("metadata", "yt_songs_more_sooongs_filtered.csv"))

audios_df = pd.concat([audios_5_df, audios_10_df, audios_more_df], ignore_index=True)

print("Total: ", len(audios_df))
print("No yt_url: ", len(audios_df[audios_df["yt_url"].isnull()]))

Total:  4068
No yt_url:  602


In [89]:
merged_df = pd.merge(filtered_songs_df, audios_df, on=["title", "artist"], how="left")

print("Columns: ", merged_df.columns.values)
print("Total: ", len(merged_df))
print("No yt_url: ", len(merged_df[merged_df["yt_url"].isnull()]))

Columns:  ['title' 'artist' 'title_listeners_x' 'title_scrobbles_x' 'title_tags_x'
 'title_duration_x' 'title_listeners_y' 'title_scrobbles_y' 'title_tags_y'
 'title_duration_y' 'yt_title' 'yt_url' 'yt_duration' 'yt_views'
 'audio_path']
Total:  4341
No yt_url:  886


In [None]:
merged_df[["title", "artist", "audio_path", "yt_title", "yt_url", "yt_duration", "yt_views"]]\
    .to_csv(os.path.join("data", "yt_songs_filtered_v1.csv"), index=False)

Updated summary after fetching the missing songs:

In [3]:
import pandas as pd
import os

filtered_songs_df = pd.read_csv(os.path.join("data", "yt_songs_filtered_v1.csv"))

print("Columns: ", filtered_songs_df.columns.values)
print("Total: ", len(filtered_songs_df))
print("No yt_url: ", len(filtered_songs_df[filtered_songs_df["audio_path"].isnull()]))

Columns:  ['title' 'artist' 'audio_path' 'yt_title' 'yt_url' 'yt_duration'
 'yt_views']
Total:  4341
No yt_url:  60


# Exploring non unique audio_path

In [38]:
import pandas as pd
import os

filtered_songs_df = pd.read_csv(os.path.join("data", "yt_songs_filtered_v1.csv"))

len(filtered_songs_df["audio_path"].unique())

3227

In [8]:
non_unique_paths = filtered_songs_df["audio_path"].value_counts()[filtered_songs_df["audio_path"].value_counts() > 1].index.tolist()
non_unique_paths

['audio\\–ù—É–º–µ—Ä 482 - –î–æ–±—Ä–∏–π —Ä–∞–Ω–æ–∫ –£–∫—Ä–∞—ó–Ω–æ - (–û—Ñ—ñ—Ü—ñ–π–Ω–∏–π –∫–ª—ñ–ø- 2015).mp3',
 'audio\\–Ü–≥–æ—Ä –ö–∞–π–¥–∞—à - —Ü—å–æ–≥–æ –≤–∞—Ä—Ç—É—î –∫–æ—Ö–∞–Ω–Ω—è (official mood video).mp3',
 'audio\\–î–∏–º–Ω–∞ –°—É–ºi—à - –ó–ª–∞–º–∞–Ωi.mp3',
 'audio\\–°–µ—Ä—Ü–µ–≤–∏–π –ù–∞–ø–∞–¥ - –°—É–±–∫—É–ª—å—Ç—É—Ä–∞.mp3',
 'audio\\–ù—É–º–µ—Ä 482 - –í–∞–∂–ª–∏–≤–∞ | Official Video.mp3',
 'audio\\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3',
 "audio\\–á—ó –¥—É—à—ñ –∑—ñ–≤'—è–ª—ñ –∫–≤—ñ—Ç–∏.mp3",
 'audio\\‚óæ–ü–ª–∞—á –Ñ—Ä–µ–º—ñ—ó ‚óæ –í–æ–Ω–∞ ‚óæ.mp3',
 'audio\\–õ–µ–Ω—Ç–∞ –ó–∞ –õ–µ–Ω—Ç–æ—é.mp3',
 'audio\\–õ—ñ–Ω—ñ—è –ú–∞–Ω–Ω–µ—Ä–≥–µ–π–º–∞ ‚Äì –î–µ —Ç–≤–æ—è –ª—ñ–Ω—ñ—è?.mp3',
 'audio\\–•–æ–ª–æ–¥–Ω–µ –°–æ–Ω—Ü–µ - –¢—ñ–Ω—å –ö–æ—Ö–∞–Ω–Ω—è (remastered HQ).mp3',
 'audio\\–û–¥–∏–Ω –≤ –∫–∞–Ω–æ–µ - –ü–æ–¥–æ–±–∞—î—Ç—å—Å—è, —è–∫ —Ç–∏ —ñ–¥–µ—à.mp3',
 'audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –í–ï–°–ù–ê.mp3',
 'audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –ë—É–ª–∏ –Ω–∞ —Å–µ–ª—ñ.m

It seems like we have to cases:
1. It is literaly the same song but with slightly different title name in the LastFM platform

In [9]:
filtered_songs_df[filtered_songs_df["audio_path"] == 'audio\\–ê–ª–æ-–∞–ª–µ.mp3']

Unnamed: 0,title,artist,audio_path,yt_title,yt_url,yt_duration,yt_views
752,–ê–ª–æ-–∞–ª–µ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ê–ª–æ-–∞–ª–µ.mp3,–ê–ª–æ-–∞–ª–µ,https://youtube.com/watch?v=xH6cxIXtGWI,222.0,172200.0
805,–ê–ª–æ –∞–ª–µ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ê–ª–æ-–∞–ª–µ.mp3,–ê–ª–æ-–∞–ª–µ,https://youtube.com/watch?v=xH6cxIXtGWI,222.0,172201.0
812,"–ê–ª–ª–æ, –∞–ª–µ",–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ê–ª–æ-–∞–ª–µ.mp3,–ê–ª–æ-–∞–ª–µ,https://youtube.com/watch?v=xH6cxIXtGWI,222.0,172201.0
813,"–¢–≤–æ—ó –∞–ª–æ, –º–æ—ó –∞–ª–µ",–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ê–ª–æ-–∞–ª–µ.mp3,–ê–ª–æ-–∞–ª–µ,https://youtube.com/watch?v=xH6cxIXtGWI,222.0,172201.0
818,"–ê–ª–æ, –∞–ª–µ",–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ê–ª–æ-–∞–ª–µ.mp3,–ê–ª–æ-–∞–ª–µ,https://youtube.com/watch?v=xH6cxIXtGWI,222.0,172201.0


2. Those are different songs, but the script failed to search the relevant YT audio (in some cases it doesn't even exist on YT)

In [36]:
filtered_songs_df[filtered_songs_df["audio_path"] == "audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3"]

Unnamed: 0,title,artist,audio_path,yt_title,yt_url,yt_duration,yt_views
750,–ù–∞ –ø–µ—Ä—à–æ–º—É –º—ñ—Å—Ü—ñ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102050.0
771,–¢–∏ –Ω–∞ –ø–µ—Ä—à–æ–º—É –º—ñ—Å—Ü—ñ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102050.0
777,–ù–∞ –ø–µ—Ä—à–æ–º—É –º—ñ—Å—Ü—ñ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102050.0
782,–ù–∞ –ø–µ—Ä—à–æ–º—É –ºi—Å—Üi,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102050.0
800,Ty na pershomu misci,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102051.0
801,–ù–∞ –ø–µ—Ä—à–æ–º—É –º—ñ—Å—Ç—ñ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102051.0
814,–ù–∞ –ø–µ—Ä—à–æ–º—É –º—ñ—Å—Ç—ñ,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å,audio\–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ.mp3,–ö—Ä–∏—Ö—ñ—Ç–∫–∞ –¶–∞—Ö–µ—Å - –ù–∞ –ü–µ—Ä—à–æ–º—É –ú—ñ—Å—Ü—ñ,https://youtube.com/watch?v=RT6BXmAArYc,212.0,102051.0


We need to perform two further postprocessing steps:
1. Normalize artist and titles and filter duplicates
2. Prepare a column which will indicate the need for re-downloading the audio (or setting None if it doesn't exist on YT)

In [19]:
non_unique_paths = filtered_songs_df.groupby("title").filter(lambda x: x["artist"].nunique() > 1)["audio_path"].value_counts()
non_unique_paths = non_unique_paths[non_unique_paths > 1].index.tolist()
non_unique_paths

['audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –ë—É–ª–∏ –Ω–∞ —Å–µ–ª—ñ.mp3',
 'audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –í–ï–°–ù–ê.mp3',
 'audio\\–á—Ö–∞–ª–∏ –∫–æ–∑–∞–∫–∏.mp3',
 'audio\\–¢–∞–º, –ü—ñ–¥ –õ—å–≤—ñ–≤—Å—å–∫–∏–º –ó–∞–º–∫–æ–º.mp3',
 'audio\\Intro.mp3',
 'audio\\–ü–∞—á–∫–∞ —Ü–∏–≥–∞—Ä–æ–∫.mp3',
 'audio\\–ñ–∞–¥–∞–Ω —ñ –°–æ–±–∞–∫–∏ feat. Gogol Bordello ‚Äì 5-–∞ –∞–≤–µ–Ω—é (–û—Ñ—ñ—Ü—ñ–π–Ω–µ –≤—ñ–¥–µ–æ).mp3',
 'audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –°–æ–Ω—è—á–Ω—ñ –¥–Ω—ñ [Official Video].mp3',
 'audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –ó–æ—Ä—è–Ω–∞ –æ—Å—ñ–Ω—å.mp3',
 'audio\\–ö–µ–¥—å –ú–∏ –ü—Ä–∏—à–ª–∞ –ö–∞—Ä—Ç–∞.mp3',
 'audio\\–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞ - –¢–∞–Ω—Üi.mp3',
 'audio\\–Ø –ø—ñ–¥—ñ–π–¥—É.mp3',
 'audio\\–¢–∞—Ä—Ç–∞–∫ feat. –ö–∞—Ç—è Chilly ‚Äî –ü–æ–Ω–∞–¥ –•–º–∞—Ä–∞–º–∏....mp3',
 'audio\\–¢—Ä–∞–≤–∞.mp3',
 'audio\\–ì–ª–∏–±–∏–Ω–∞.mp3',
 'audio\\–ü–ª–∞—á –Ñ—Ä–µ–º—ñ—ó - –¢–∏ –≤—Ç—Ä–µ—Ç—î —Ü—å–æ–≥–æ –ª—ñ—Ç–∞ –∑–∞—Ü–≤—ñ—Ç–µ—à.mp3',
 'audio\\–ë—É–¥–µ –ù–∞–º –ó –¢–æ–±–æ—é –©

In [33]:
antitila_df = filtered_songs_df[filtered_songs_df["audio_path"] == 'audio\\–Ø –ø—ñ–¥—ñ–π–¥—É.mp3']
antitila_df

Unnamed: 0,title,artist,audio_path,yt_title,yt_url,yt_duration,yt_views
627,–Ø –ø—ñ–¥—ñ–π–¥—É,–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞,audio\–Ø –ø—ñ–¥—ñ–π–¥—É.mp3,–Ø –ø—ñ–¥—ñ–π–¥—É,https://youtube.com/watch?v=eBTkMoyZgp0,137.0,31225.0
643,–Ø –øi–¥i–π–¥—É,–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞,audio\–Ø –ø—ñ–¥—ñ–π–¥—É.mp3,–Ø –ø—ñ–¥—ñ–π–¥—É,https://youtube.com/watch?v=eBTkMoyZgp0,137.0,31225.0
1380,–Ø –øi–¥i–π–¥—É,–í–æ–ø–ªi –íi–¥–æ–ø–ª—è—Å–æ–≤–∞,audio\–Ø –ø—ñ–¥—ñ–π–¥—É.mp3,–Ø –ø—ñ–¥—ñ–π–¥—É,https://youtube.com/watch?v=eBTkMoyZgp0,137.0,31226.0
1385,–Ø –ø—ñ–¥—ñ–π–¥—É,–í–æ–ø–ªi –íi–¥–æ–ø–ª—è—Å–æ–≤–∞,audio\–Ø –ø—ñ–¥—ñ–π–¥—É.mp3,–Ø –ø—ñ–¥—ñ–π–¥—É,https://youtube.com/watch?v=eBTkMoyZgp0,137.0,31226.0


In [34]:
antitila_df["artist"].value_counts()

–í–æ–ø–ª—ñ –í—ñ–¥–æ–ø–ª—è—Å–æ–≤–∞    2
–í–æ–ø–ªi –íi–¥–æ–ø–ª—è—Å–æ–≤–∞    2
Name: artist, dtype: int64

In [35]:
antitila_df["title"].value_counts()

–Ø –ø—ñ–¥—ñ–π–¥—É    2
–Ø –øi–¥i–π–¥—É    2
Name: title, dtype: int64