In [205]:
ver = "v.2.024.8 [Local]"

import requests
import os
import pandas as pd
import csv
import time
import json
import datetime
from math import nan

In [206]:
userDataFolder = '/Users/viktorgribov/GitHub/mushroomoff.github.io/'
dbFolder = 'Databases/'
releasesDB = userDataFolder + dbFolder + 'AMR_releases_DB.csv'
artistIDDB = userDataFolder + dbFolder + 'AMR_artisitIDs.csv'
fieldNames = ['mainArtist', 'mainId', 'artistName', 'artistId', 'primaryGenreName', 
              'collectionId', 'collectionName', 'collectionCensoredName', 'artworkUrl100', 
              'collectionExplicitness', 'trackCount', 'copyright', 'country', 'releaseDate', 'releaseYear', 
              'dateUpdate', 'artworkUrlD', 'downloadedCover', 'downloadedRelease', 'updReason']
logFile = userDataFolder + 'status.log'

# establishing session
ses = requests.Session() 
ses.headers.update({'Referer': 'https://itunes.apple.com', 
                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:45.0) Gecko/20100101 Firefox/45.0'})

# Инициализация функций===================================================

# Процедура Замены символов для Markdown v2
def ReplaceSymbols(rsTxt):
    rsTmplt = """'_*[]",()~`>#+-=|{}.!"""
    for rsf in range(len(rsTmplt)):
        rsTxt = rsTxt.replace(rsTmplt[rsf], '\\' + rsTmplt[rsf])
    return rsTxt

In [207]:
# Процедура Поиска релизов исполнителя в базе iTunes  
def FindReleases(artistID, country, artistPrintName):
    allDataFrame = pd.DataFrame()
    dfExport = pd.DataFrame()
    check_ers = 0
    url = 'https://itunes.apple.com/lookup?id=' + str(artistID) + '&country=' + country + '&entity=album&limit=200'
    request = ses.get(url)
    if request.status_code == 200:     
        dJSON = json.loads(request.text)
        if dJSON['resultCount'] > 1:
            dfTemp = pd.DataFrame(dJSON['results'])
            allDataFrame = pd.concat([allDataFrame, dfTemp[['artistName', 'artistId', 'primaryGenreName', 'collectionId', 'collectionName', 'collectionCensoredName', 'artworkUrl100', 'collectionExplicitness', 'trackCount', 'copyright', 'country', 'releaseDate']]], ignore_index=True)
        else:
            if check_ers == 0:
                print('\n', end='')
            print(' ' + country + ' - EMPTY |', sep=' ', end='', flush=True)
            check_ers = 1
    else:
        if check_ers == 0:
            print('\n', end='')
        print(' ' + country + ' - ERROR (' + str(request.status_code) + ') |', sep=' ', end='', flush=True)
        check_ers = 1
    time.sleep(1) # обход блокировки
    allDataFrame.drop_duplicates(subset='artworkUrl100', keep='first', inplace=True, ignore_index=True)
    if len(allDataFrame) > 0:
        dfExport = allDataFrame.loc[allDataFrame['collectionName'].notna()]
    else:
        if check_ers == 0:
            print('\n', end='')
        print(' Bad ID: ' + str(artistID), sep=' ', end='', flush=True)
        check_ers = 1

    if check_ers == 1:
        print ('') 

    if len(dfExport) > 0:
        pdiTunesDB = pd.read_csv(releasesDB, sep=";")
        #Открываем файл лога для проверки скаченных файлов и записи новых скачиваний
        csvfile = open(releasesDB, 'a+', newline='')
        writer = csv.DictWriter(csvfile, delimiter=';', fieldnames=fieldNames)

        dateUpdate = str(datetime.datetime.now())[0:19]
        # mainArtist = allDataFrame['artistName'].loc[0]
        mainArtist = artistPrintName
        mainId = artistID
        updReason = ''
        newRelCounter = 0
        newCovCounter = 0
        #Cкачиваем обложки
        for index, row in dfExport.iterrows():
            artistName = row.iloc[0]
            artistId = row.iloc[1]
            primaryGenreName = row.iloc[2]
            collectionId = row.iloc[3]
            collectionName = row.iloc[4]
            collectionCensoredName = row.iloc[5]
            artworkUrl100 = row.iloc[6]
            collectionExplicitness = row.iloc[7]
            trackCount = row.iloc[8]
            copyright = row.iloc[9]
            country = row.iloc[10]
            releaseDate = row.iloc[11][:10]
            releaseYear = row.iloc[11][:4]
            artworkUrlD = row.iloc[6].replace('100x100bb', '100000x100000-999')
            downloadedCover = ''
            downloadedRelease = ''
            updReason = ''
            if len(pdiTunesDB.loc[pdiTunesDB['collectionId']  == dfExport.iloc[index-1]['collectionId']])  == 0:
                updReason = 'New release'
                newRelCounter += 1
            elif len(pdiTunesDB[pdiTunesDB['artworkUrl100'].str[40:] == dfExport.iloc[index-1]['artworkUrl100'][40:]]) == 0:
                updReason = 'New cover'
                newCovCounter += 1
                #.str[40] -------------------------------V
                #https://is2-ssl.mzstatic.com/image/thumb/Music/v4/b2/cc/64/b2cc645c-9f18-db02-d0ab-69e296ea4d70/source/100x100bb.jpg

            #Это проверка - нужно ли сверяться с логом
            if updReason != '':
                writer.writerow({'mainArtist': mainArtist, 'mainId': mainId, 'artistName': artistName,  
                                 'artistId': artistId,  'primaryGenreName': primaryGenreName,  
                                 'collectionId': collectionId,  'collectionName': collectionName,  
                                 'collectionCensoredName': collectionCensoredName,  'artworkUrl100': artworkUrl100,  
                                 'collectionExplicitness': collectionExplicitness,  'trackCount': trackCount,  
                                 'copyright': copyright,  'country': country,  'releaseDate': releaseDate,  
                                 'releaseYear': releaseYear,  'dateUpdate': dateUpdate[:10],  
                                 'artworkUrlD': artworkUrlD,  'downloadedCover': downloadedCover,  
                                 'downloadedRelease': downloadedRelease,  'updReason': updReason})

        csvfile.close()
        pdiTunesDB = pd.DataFrame() 
        if (newRelCounter + newCovCounter) > 0:
            print('\n^ ' + str(newRelCounter + newCovCounter) + ' new records: ' + str(newRelCounter) + ' releases, ' + str(newCovCounter) + ' covers')

# Инициализация функций===================================================

In [208]:
# Функция поиска ошибок в логе

def findErrors(logFile):
    # Читаем лог
    with open(logFile, 'r') as lf:
        lfText = lf.readlines()
    # Ищем индексы первой и последней записи свежей отработки Apple Music Releases LookApp
    bgn = 0
    end = 0
    for line in lfText:
        if end > 0:
            break
        if line.find('[Apple Music Releases LookApp] - [V] Done!') > 0:
            bgn = lfText.index(line)
        if line.find('[Apple Music Releases LookApp] - v.') > 0:
            end = lfText.index(line)
    # Формируем список артистов и стран
    erList = []
    for line in lfText:
        if lfText.index(line) == end:
            break
        if lfText.index(line) > bgn:
            lineSplit = line.split(' - ')
            if len(lineSplit) == 5 and lineSplit[4] == 'ERROR (503)\n':
                erList.append([lineSplit[2], lineSplit[3]])    
    print('Найдено ошибок:', str(len(erList)))
    return erList

In [213]:
artList = findErrors(logFile)
artistIDlist = pd.read_csv(artistIDDB, sep=';')

for a in artList:
    artID = artistIDlist.loc[artistIDlist['mainArtist'] == a[0], 'mainId']
    if len(artID) > 0:
        for q in artID:
            print(q, a[1], a[0])
            FindReleases(q, a[1], a[0]) 
    else:
        print('Error!', a[0])

355823121 us Лешак
352344842 jp Карандаш

 jp - ERROR (503) | Bad ID: 352344842
569705781 jp Грай
1372953342 jp Баста
1372953342 ru Баста
77315264 jp Wig Wam

 jp - ERROR (503) | Bad ID: 77315264
189950439 ru Veil of Maya
1082238720 jp Syncatto
1082238720 ru Syncatto
994032832 ru Saint Asonia
1674741887 ru Ryujin
272130237 jp Powerwolf
1484547434 us Paralydium

 us - ERROR (503) | Bad ID: 1484547434
3445763 jp Papa Roach
1484238276 ru Ottto
271815617 ru Orden Ogan
628452620 jp Nita Strauss
5280361 jp Nickelback
539605 jp Motorhead
1216150815 ru Master Boot Records

 ru - ERROR (503) | Bad ID: 1216150815
267420258 jp Malevolence

 jp - ERROR (503) | Bad ID: 267420258
530875466 ru Leander Rising
1450170610 jp Keygen Church
73592787 us Katatonia
1883437 jp Josh Turner
963872688 jp Jidenna
1558093934 ru Glass Beams
1457925016 jp Future Palace
151910203 us Fergie
17508882 us Farmer Boys
1344210544 ru Equipoise
111051 us Eminem
430613575 jp Edge of Paradise
1031397873 ru Dua Lipa
95701594 us

In [None]:
# на всякий случай
curRow = artID.index[0]
artID.reset_index(drop=True, inplace=True)
curArt= artID[0]