In [6]:
import requests
import json
import pandas as pd
from yandex_music import Track, Client

In [7]:
def get_data_from_file(file):
    with open(file, "r") as f:
        data = json.load(f)['trackIds']
        for i in range(len(data)):
            if ':' in str(data[i]):
                data[i] = data[i].split(':')[0]
            data[i] = int(data[i])
        return data
        

In [8]:
def link_two_dbs(a, b):
    i = 0
    while i < len(a):
        j = i
        k = 0
        while True:
            if j == len(a):
                return a + b[k:]
            if k == len(b):
                return a
            if a[j] != b[k]:
                break
            j += 1
            k += 1
        i += 1
    return a + b

In [9]:
def link_data(data):
    result = list(reversed(data[0]))
    i = 1
    while i < len(data):
        result = link_two_dbs(result, list(reversed(data[i])))
        i += 1
    return result

In [22]:
def link_files(files):
    data = []
    count = 0
    for file in files:
        filedata = get_data_from_file(file)
        data.append(filedata)
        count += len(filedata)
    linked = link_data(data)
    print("Recieved {} lines of raw data, cutted {} duplicates\n\
Total {} lines ready for analysis".format(count, count-len(linked), len(linked)))
    return linked
    

In [23]:
# OREDERED BY TIME datafiles
CACHED_DATA = [
    "sample_data/data14.07.2021.json",
    "sample_data/data22.09.2021.json",
    "sample_data/data31.01.2022.json",
    "sample_data/data13.07.2022.json",
    "sample_data/data25.07.2022.json"    
]

In [24]:
data = link_files(CACHED_DATA)

Recieved 18992 lines of raw data, cutted 8816 duplicates
Total 10176 lines ready for analysis


In [9]:
len(data)

10176

In [16]:
def form_dataframe(data):
    df = pd.DataFrame({'id' : data, 'count' : 1})
    df = df.groupby(by='id').count().reset_index()
    client = Client()
    df['data'] = client.tracks(df['id'])
    df['name'] = ""
    df['artist'] = ""
    
    pd.options.mode.chained_assignment = None
    for i in range(len(df)):
        df['name'][i] = df['data'][i]['title']
        try:
            df['artist'][i] = df['data'][i]['artists'][0]['name']
            for artist in df['data'][i]['artists'][1:]:
                df['artist'][i] += ", " + artist['name']
        except IndexError as e:
            df['artist'][i] = "Unknown"
    
    
    df = df.sort_values(['count'], ascending=False)
    df = df.reset_index()
    
    return df
   
    

In [17]:
df = form_dataframe(data)

In [18]:
df

Unnamed: 0,index,id,count,data,name,artist
0,1357,62556361,299,"{'id': '62556361', 'title': 'Марафоны', 'avail...",Марафоны,"Три дня дождя, Роки"
1,1806,80328592,160,"{'id': '80328592', 'title': 'Фьюче симпл', 'av...",Фьюче симпл,Narkomfin
2,1310,61209859,135,"{'id': '61209859', 'title': 'Красота', 'availa...",Красота,Три дня дождя
3,1764,78786629,113,"{'id': '78786629', 'title': 'Обсудим за столом...",Обсудим за столом,plagueinside
4,1315,61563837,113,"{'id': '61563837', 'title': 'Конфетти', 'avail...",Конфетти,Port Avenue
...,...,...,...,...,...,...
2186,871,39980101,1,"{'id': '39980101', 'title': 'Взрывая тишину', ...",Взрывая тишину,Коrsика
2187,868,39915054,1,"{'id': '39915054', 'title': 'Eleanor Rigby', '...",Eleanor Rigby,The Beatles
2188,865,39843915,1,"{'id': '39843915', 'title': 'before I close my...",before I close my eyes,XXXTentacion
2189,864,39843905,1,"{'id': '39843905', 'title': 'NUMB', 'available...",NUMB,XXXTentacion


In [19]:
def print_to_file(df, file="result.txt"):
    with open(file, 'w') as f3:
        for i in range(len(df)):
            f3.write("{0:<5} {1} - {2}\n".format(df['count'][i], df['artist'][i], df['name'][i]))

In [20]:
print_to_file(df)

In [21]:
df.head(60)

Unnamed: 0,index,id,count,data,name,artist
0,1357,62556361,299,"{'id': '62556361', 'title': 'Марафоны', 'avail...",Марафоны,"Три дня дождя, Роки"
1,1806,80328592,160,"{'id': '80328592', 'title': 'Фьюче симпл', 'av...",Фьюче симпл,Narkomfin
2,1310,61209859,135,"{'id': '61209859', 'title': 'Красота', 'availa...",Красота,Три дня дождя
3,1764,78786629,113,"{'id': '78786629', 'title': 'Обсудим за столом...",Обсудим за столом,plagueinside
4,1315,61563837,113,"{'id': '61563837', 'title': 'Конфетти', 'avail...",Конфетти,Port Avenue
5,1869,83950827,111,"{'id': '83950827', 'title': 'Перезаряжай', 'av...",Перезаряжай,Три дня дождя
6,1625,73062356,104,"{'id': '73062356', 'title': 'Одиночество', 'av...",Одиночество,QWAZAR
7,2038,91677671,101,"{'id': '91677671', 'title': 'Cuba Libre', 'ava...",Cuba Libre,Markul
8,952,42940850,84,"{'id': '42940850', 'title': 'Худший друг', 'av...",Худший друг,Markul
9,2013,90805155,82,"{'id': '90805155', 'title': 'В глаза', 'availa...",В глаза,Port Avenue
