In [1]:
from pathlib import Path
import os, re
from time import perf_counter
from tqdm import tqdm
from mutagen import File
import mutagen.id3._util
from mutagen.mp3 import MP3
from mutagen.id3 import TPE1, TALB, TIT2, TRCK, TPE2, TDRC, TPOS, COMM, ID3
import pandas as pd

In [17]:
class Song:
    def __init__(self, path, title="",artist="", date="", match=None,
                 verified=False, c_singer=[], version=None, comment=None,
                t_n=None):
        self.path = path
        self.title = title
        self.artist = artist
        self.date = date
        self.match = match
        self.verified = verified
        self.c_singer = c_singer
        self.version = version
        self.comment = comment
        self.track_number = t_n

    def get_year(self):
        date = song.date
        if not date:
            return ""
        dd, mm, yy = date.split('/')
        return ("20"+yy)
    
    def get_track_actual_number(self):
        tn = self.track_number
        if '/' in tn:
            t1, t2 = tn.split("/")
            tn = t1
        return tn

    def replace_special_filenames(text, substitute):
        invalid_chars = ["*", "\\", "/", ":", "?", "|", "<", ">", "\""]
        for x in invalid_chars:
            text = text.replace(x, substitute)
        return text
        
    def __str__(self):
        return (f"{self.get_track_actual_number().zfill(3)}. {self.artist} - {self.title}"+
            f" ({self.c_singer[0] if len(self.c_singer) == 1 else "Duet"}"+
            f"{self.version if song.version else ".v1"}).mp3")
        
    def __repr__(self):
        return f"{self.title if self.title else self.path}"

In [32]:
def replace_special_filenames(text, substitute):
    invalid_chars = ["*", "\\", "/", ":", "?", "|", "<", ">", "\""]
    for x in invalid_chars:
        text = text.replace(x, substitute)
    return text

test_str = "**\\//:?|<>\""
print(len(test_str))
new_str = replace_special_filenames(test_str, '_')
print(new_str)
print(len(new_str))

11
___________
11


In [18]:
Mega_folder = r"C:\Users\Nyss\Documents\Evil & Neuro Songs"

def get_all_mp3(directory):
    """
    Function to get all mp3 files from the folders.
    Although there were some mp4 and wav files in the drive
    only the mp3s were the target of my enquire.
    """
    p = Path(directory)
    return [Song(str(f)) for f in p.rglob('*.mp3') if f.is_file()]

Mega_files = get_all_mp3(Mega_folder)

In [19]:
print(len(Mega_files))

1145


In [20]:
def clean_date(date):
    if date is None:
        return ""
    removes = ['(', ')' ,'[', ']']
    replaces = ['-', ' ', '.']
    for x in removes:
        date = date.replace(x, "")
    for x in replaces:
        date = date.replace(x,"/")
    return date

In [21]:
def MMDDYY_to_DDMMYY(date):
    try:
        mm, dd, yy = date.split('/')
    except ValueError as e:
        return date
    if len(dd) == 1:
        dd = "0"+dd
    if len(mm) == 1:
        mm = "0"+mm
    date = "/".join([dd,mm,yy])
    return date

In [22]:
def DDMMYY_to_YYYYMMDD(date):
    try:
        dd, mm, yy = date.split('/')
    except ValueError as e:
        return date
    if len(dd) == 1:
        dd = "0"+dd
    if len(mm) == 1:
        mm = "0"+mm
    if len(yy) == 2:
        yy = "20"+yy
    date = "-".join([yy,mm,dd])
    return date

In [23]:
artist_pattern= r"(?:Duet\s?\(.*?\)\s?)?\((.*)\)"
date_pattern = r"(\(\d{2}\s\d{2}\s\d{2}\))"
c_singer_pattern = r"^Duet\s?\((\S+)(?:\s&\s(\S+))(?:\s&\s(\S+))?\)"
comment_from_comment_pattern = r'[\D]*$'
version_pattern = r"(\.?v[123](?:\.[123])?)"
feat_pattern = r"(\(feat\.?\s\w+\))"

In [24]:
# Title approach:
 #just remove everything that is not a title

In [25]:
def title_filter(title):
    title = re.sub(version_pattern, "", title)
    title = re.sub(feat_pattern, "", title)
    removables = ["Evil", "Neuro", "Duet", "()"]
    for x in removables:
        #print(title)
        title = title.replace(x , "")
    title = title.replace("  "," ")
    #print(title)
    return title.strip()

In [26]:
def test(pattern, text):
    match = re.search(pattern, text)
    if match is None:
        print("Match failed!")
    else:
        print(match.group(1))

In [27]:
# Title test
title1 = "Take On Me (Neuro.v1.2)"
print(title_filter(title1))
title3 = "Bring Me To Life (Evil) (Duet) (feat. Cerber)"
print(title_filter(title3))

Take On Me
Bring Me To Life


In [28]:
# Version test
title1 = "Take On Me (Neuro.v1.2)"
print(re.sub(version_pattern, "", title1))
test(version_pattern, title1)
title2 = "Digital Girl (Neuro.v3)"
test(version_pattern, title2)

Take On Me (Neuro)
.v1.2
.v3


In [29]:
artist1 = "Duet (neweru & evilyn) (kessoku band)"
test(artist_pattern, artist1)

kessoku band


In [30]:
Mega_dict = {}

def get_tag_value(tags, tag):
    frame = tags.get(tag)
    if frame is not None:
        return getattr(frame, 'text', None)


def get_content_from_tags(all_tags, tag_list):
    for tag in tag_list:
        content_value = get_tag_value(all_tags, tag)
        if content_value is not None:
            content_text = str(content_value[0])
            return content_text
            
    return ""


def get_comment_safe(path):
    tags = MP3(path, ID3=ID3)

    track_number = get_content_from_tags(tags, ['TRCK'])

    title_text = get_content_from_tags(tags, ['TIT2'])
    
    title = title_filter(title_text)
    version  = re.search(version_pattern, title_text)
    version = version.group(1) if version else ""
    if version and (not version.startswith('.')):
        version = '.' + version
    
    comment = get_content_from_tags(tags, ['COMM::eng', 'COMM::XXX'])

    contributing_artist_text = get_content_from_tags(tags, ['TPE1'])
    contributing_artist_text = contributing_artist_text.strip()
        
    c_singer=[]
    
    if contributing_artist_text.startswith("Neuro"):
        c_singer.append("Neuro")
    elif contributing_artist_text.startswith("Evil"):
        c_singer.append("Evil")
    else:
        c_singer_match = re.search(c_singer_pattern, contributing_artist_text)
        if c_singer_match is not None:
            for c_singer_individual in c_singer_match.groups():
                if c_singer_individual is not None:
                    c_singer.append(c_singer_individual)

    
    artist=""
    artist_match = re.search(artist_pattern, contributing_artist_text)
    if artist_match is not None:
        artist = artist_match.group(1)
    
    return comment, c_singer, artist, title, version, track_number


def get_song_data(song, dictionary):
    path = song.path
    file = os.path.basename(path)
    
    comment_meta, c_singer_meta, artist_meta, title_meta, version_meta, track_meta = get_comment_safe(path)
    title = title_meta
    c_singer = c_singer_meta
    artist = artist_meta
    version = version_meta
    track_number = track_meta
    
    comment_match = re.search(comment_from_comment_pattern, comment_meta)
    comment = comment_match.group(0)
    comment = comment.strip()
        
    date = re.sub(comment_from_comment_pattern, "", comment_meta)

    meta = True
    if not date:
        date = re.search(date_pattern, file)
        if date is not None:    
            date = date.group(1)
            meta = False
            print(date)
    
    sanitized_date = clean_date(date)
    sanitized_date = MMDDYY_to_DDMMYY(sanitized_date) if meta else sanitized_date
    
    song.title = title.strip()
    song.date = DDMMYY_to_YYYYMMDD(sanitized_date.strip())
    song.c_singer = c_singer
    song.artist = artist.strip()
    song.version = version.strip()
    song.track_number = track_number.strip()
    song.comment = comment.strip()
    
    Mega_dict.setdefault(sanitized_date, []).append(song)

if __name__ == "__main__":
    for song in tqdm(Mega_files):
        get_song_data(song, Mega_dict)

100%|████████████████████████████████████████████████████████████████| 1145/1145 [00:53<00:00, 21.55it/s]


In [34]:
count = 0
largest = 0
s = None
with open("song_artist.txt", 'w', encoding='utf-8') as handle:
    for key in Mega_dict:
        for song in Mega_dict[key]:
            #if len(str(song)) > largest:
                #largest = len(str(song))
                #s = str(song)
            #if song.comment:
                #print(song.comment)
            #ksk = str(song)
            #if "(Neuro)" in ksk or "(Evil)" in ksk or "(Duet)" in ksk:
                    #handle.write(str(song)+'\n')
            #if not song.version:
                #handle.write(str(song)+'\n')
                #count += 1
                #print(count)
            #if song.artist.count("(") >= 1:
                #print(song.artist)
            #handle.write(song.artist+'\n')
            #for c_singer in song.c_singer:
                #if (c_singer.strip() != "Evil") and (c_singer.strip() != "Neuro"):
                    #print(c_singer)
                    #pass
            #handle.write(song.version+'\n')
            handle.write(replace_special_filenames(str(song), '_')+'\n')
print(largest)
print(s)

0
None


In [None]:
for key in Mega_dict:
    print(f"{key} - {len(Mega_dict[key])}")


In [None]:
neuro_songs = 0
evil_songs = 0
duet_songs = 0
for key in Mega_dict:
    for song in Mega_dict[key]:
        if song.c_singer == "Evil":
            evil_songs+=1
        elif song.c_singer == "Duet":
            duet_songs += 1
        elif song.c_singer == "Neuro":
            neuro_songs += 1
print(neuro_songs)
print(evil_songs)
print(duet_songs)

In [None]:
def new_artist(song):
    cover_singer = song.c_singer
    text = ""
    if len(cover_singer) == 1:
        text = f"{cover_singer[0]} - {song.artist}"        
    elif (len(cover_singer) == 2):
        text = f"Duet ("
        if "Neuro" in cover_singer:
            text += "Neuro"
            cover_singer.remove("Neuro")
        elif "Evil" in cover_singer:
            text += "Evil"
            cover_singer.remove("Evil")
        text += f" & {cover_singer[0]}) - {song.artist}"
    else:
        print("Fuck")
    return text

album_pattern = r"DISC (\d)"

def new_album(path: str, group: int):
    match = re.search(album_pattern, path)
    if match is not None:
        return match.group(group)
    return ""

In [None]:
REMOVE_COMM_KEY = "COMM::xxx"

def update_and_rename_mp3(song, new_dir):
    old_path = song.path
    parent = os.path.basename(os.path.dirname(song.path))
    old_filename = os.path.basename(song.path)
    
    audio = MP3(old_path, ID3=ID3)
    
    if audio.tags is None:
        audio.add_tags()

    if REMOVE_COMM_KEY in audio.tags:
        del audio.tags[REMOVE_COMM_KEY]

    audio.tags.add(TPE1(encoding=3, text=[new_artist(song)]))
    audio.tags.add(TALB(encoding=3, text=[new_album(parent, 0)]))
    audio.tags.add(TIT2(encoding=3, text=[song.title]))
    audio.tags.add(TRCK(encoding=3, text=[song.track_number]))
    audio.tags.add(TPE2(encoding=3, text=["QueenPb + vedal987"]))
    audio.tags.add(TDRC(encoding=3, text=[song.get_year()]))
    audio.tags.add(TPOS(encoding=3, text=[new_album(parent, 1)]))

    if not song.date:
        print("ERROR DATE!")
        return False

    NEW_COMM_ENG_FRAME = COMM(encoding=2,lang='eng', desc='',text=[f"{song.date} {song.comment}"])
    audio.tags.add(NEW_COMM_ENG_FRAME)
    NEW_COMM_V1_ENG_FRAME = COMM(encoding=2,lang='eng', desc='ID3v1 Comment',text=[f"{song.date} {song.comment}"])
    audio.tags.add(NEW_COMM_V1_ENG_FRAME)

    payload = f"     "
    
    NEW_COMM_VED_FRAME = COMM(encoding=3,lang='ved', desc='',text=[payload])
    audio.tags.add(NEW_COMM_VED_FRAME)


    
    audio.save()
    
    new_filename = str(song)
    new_path = os.path.join(new_dir,parent,new_filename)

    os.makedirs(os.path.dirname(new_path), exist_ok=True)
    
    os.rename(old_path, new_path)
    
    print(f"✅ Processed: {old_filename} -> {new_filename}")
        

target_song = '/Users/yourname/Music/Untagged_MP3s' 

In [None]:
# test for above
s = None
for song in Mega_dict['23/07/25']:
    print((song))
    if str(song).startswith('075'):
        s = song
        break
print(type(s))
update_and_rename_mp3(s, r"C:\Users\Nyss\Documents\Test\Evil & Neuro Songs")

In [None]:
def create_spreadsheet_data(mega_dict):
    """
    Extracts data from the Song objects into a spreadsheet.
    """
    
    records = []
    
    for key, song_list in mega_dict.items():
        for song in song_list:
            record = {
            "Title": song.title,
            "Track Number": song.track_number,
            "Artist": song.artist,
            "Date": song.date,
            "Cover Singer": song.c_singer,
            "Version": song.version,
            "Comment": song.comment,
                    }
            
            records.append(record)
                
    return records

data_records = create_spreadsheet_data(Mega_dict)

df = pd.DataFrame(data_records)

df.to_excel('archive_standart.xlsx', index=False) 