In [36]:
import psycopg2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Markdown as MD
from IPython.display import display
from numba import jit, njit, vectorize

In [2]:
# Create Connection to postgresql
# psql -h localhost -p 25432 -U musicbrainz musicbrainz_db

conn = psycopg2.connect(
    host="localhost",
    database="musicbrainz_db",
    user="musicbrainz",
    port=5432)

cursor = conn.cursor()

cursor.execute("select * from recording limit 1;")
cursor.fetchone()[0]
print("connection successful")

connection successful


## Loading Recording Tables

In [3]:
%%time
# Loading recording table SQL tables

MB_recording = pd.read_sql('SELECT gid FROM recording', con = conn, columns = ['rec-gid'])
# MB_recording.set_index('gid', inplace = True)
MB_recording.head()



CPU times: user 6.87 s, sys: 2.56 s, total: 9.43 s
Wall time: 18.3 s


Unnamed: 0,gid
0,0f42ab32-22cd-4dcf-927b-a8d9a183d68b
1,4dce8f93-45ee-4573-8558-8cd321256233
2,48fabe3f-0fbd-4145-a917-83d164d6386f
3,b30b9943-9100-4d84-9ad2-69859ea88fbb
4,b55f1db3-c6d2-4645-b908-03e1017a99c2


In [4]:
%%time
# Loading redirect table SQL table

# MB_redirects = pd.read_sql('select r.gid, rgr.gid from recording r join recording_gid_redirect rgr on rgr.new_id = r.id', con = conn)
# MB_redirects.columns = ['old', 'new']

MB_redirects = pd.read_sql('select r.gid AS new, rgr.gid AS old from recording r join recording_gid_redirect rgr on rgr.new_id = r.id', con=conn)
MB_redirects.head()

# select t.gid AS new, tgr.gid AS old from track t join track_gid_redirect tgr on tgr.new_id = t.id



CPU times: user 1.26 s, sys: 209 ms, total: 1.47 s
Wall time: 7.08 s


Unnamed: 0,new,old
0,597aff67-7326-4609-8ded-4ea45d4beed6,2d752b5a-ae16-4187-b18e-12cfb4113f4c
1,ddda2877-0fbd-495e-a19b-6e9f4e97d711,8edeb408-437d-4e97-93f8-2fb982927fb0
2,6db0efa0-46e0-4f27-94ce-6803513787be,de4ed46b-a78b-4bd4-94f5-82f5829931e0
3,8dda3e70-02f3-4032-871c-b16ba184a9f6,89d8fd1e-7372-42cf-8ab0-6d4d46576a86
4,00b0148f-5db3-42f9-b266-45fa2fcebb56,2ea53fe1-43ef-4e17-9db1-60156d3dc8ce


## Loading track tables

In [5]:
%%time
# Loading track table

MB_track = pd.read_sql('SELECT gid FROM track', con = conn)
MB_track.head()



CPU times: user 9.55 s, sys: 3.06 s, total: 12.6 s
Wall time: 25.2 s


Unnamed: 0,gid
0,9b02977e-a03b-4a6b-a9a9-06e722bdcd7a
1,43da7544-6283-3159-84f9-537fe823a1a7
2,0b6b6283-a5a8-4560-9fa8-f68a430d86ea
3,fa124f9a-d8ea-36a3-bed3-c817fdbe13e2
4,e56c6d3c-09cf-33a0-81c5-ceade77c35dc


In [6]:
%%time

MB_track_redirects = pd.read_sql('select t.gid AS new, tgr.gid AS old from track t join track_gid_redirect tgr on tgr.new_id = t.id', con=conn)
MB_track_redirects.head()



CPU times: user 232 ms, sys: 33.1 ms, total: 265 ms
Wall time: 929 ms


Unnamed: 0,new,old
0,5c2e31e6-8ff0-3f80-8e52-d7d6ba642aaf,d8abbf14-5945-3639-a0c9-3e9c70b1c0a4
1,742af864-28da-3fc3-9d85-ee509d83d1ce,446b27ef-92fd-3ea1-ad0a-fe1055196406
2,ec67694e-fefb-3374-9f4f-18d12233d055,403b9e19-a135-3acc-ae69-58fb0d735036
3,2a17c2f4-0e7e-39ea-acad-b88c2a503631,b61dde50-de25-3802-b706-dd0d490879ae
4,33caee67-b948-361e-bdc5-77fe0d842a88,13bee970-0129-320d-94c8-dfbc7748c692


In [7]:
%%time
# Loading canonical recording mbid table

MB_canonical = pd.read_sql('SELECT recording_mbid as old, canonical_recording_mbid as new FROM mapping.canonical_recording_redirect', con = conn)
# MB_canonical.columns = ['old', 'new']
# MB_canonical.set_index('recording-gid', inplace=True)
MB_canonical.head()



CPU times: user 2.2 s, sys: 833 ms, total: 3.03 s
Wall time: 5.92 s


Unnamed: 0,old,new
0,6ac02452-ee12-4f86-b389-bd20ba2fefcf,3e8eebfd-7613-4b3d-acbe-41709be76618
1,b4c26989-1b9e-4d50-8cde-56d6472e4bc3,3e8eebfd-7613-4b3d-acbe-41709be76618
2,601e1cf3-ad6c-4e38-9128-ba4d0d4b010f,b1050d12-b8af-409c-9cff-22759d93e240
3,35c4d840-e51f-4c07-9418-af9335b29642,f4680747-bf28-417a-ab33-af00577d8ac2
4,9ba7a9b9-a21c-4b12-8771-4c108b08b3e2,13b3875a-c89a-4be5-a6e4-0ca9164bc41d


## Loading artist tables

In [8]:
%%time
# Loading artist-mbid table for artist-conflation.

MB_artist = pd.read_sql('SELECT gid FROM artist', con = conn)
MB_artist.head()



CPU times: user 553 ms, sys: 62.4 ms, total: 615 ms
Wall time: 1.33 s


Unnamed: 0,gid
0,fadeb38c-833f-40bc-9d8c-a6383b38b1be
1,49add228-eac5-4de8-836c-d75cde7369c3
2,c112a400-af49-4665-8bba-741531d962a1
3,ca3f3ee1-c4a7-4bac-a16a-0b888a396c6b
4,7b4a548e-a01a-49b7-82e7-b49efeb9732c


In [9]:
%%time

MB_artist_redirects = pd.read_sql('select a.gid AS new, agr.gid AS old from artist a join artist_gid_redirect agr on agr.new_id = a.id', con=conn)
MB_artist_redirects.head()



CPU times: user 34.7 ms, sys: 10.2 ms, total: 44.9 ms
Wall time: 845 ms


Unnamed: 0,new,old
0,ad8260b2-2767-4e9b-9ece-7977fbcedadf,224efe8c-c070-4e8a-9402-d41d52f5b4f1
1,c8f70fe2-a3fb-48cd-af02-ce5df49a7875,549de92e-a454-47f3-a26f-900985cf4431
2,21074eb7-3849-4d0e-bd4d-5447ff175c45,aca98fdf-f1b5-4831-8d58-ab2541c63863
3,21074eb7-3849-4d0e-bd4d-5447ff175c45,d442c9b0-2052-4fda-ad7b-e254d70cd61e
4,21074eb7-3849-4d0e-bd4d-5447ff175c45,33da8f6e-4c9c-4a54-903a-d10788ceea77


# Loading Data

In [34]:
%%time
# Reads a list of file paths and reads + compiles data into a single pd.DataFrame
def read_files(file_path_repo):
    
    # init new empty main dataframe
    df = pd.DataFrame(columns = ['timestamp', 'artist-MBID', 'release-MBID', 'recording-MBID'])
    
    # Open a file with MLHD file paths to process
    with open(file_path_repo, 'r') as f:
        file_paths = f.readlines()
        file_paths= [item.strip() for item in file_paths]
    
    # Read files and compile into single df
    for pth in file_paths:
        temp = pd.read_csv(pth, sep='\t', names=['timestamp', 'artist-MBID', 'release-MBID', 'recording-MBID'])
        temp = temp[-temp['recording-MBID'].isna()]

        df = pd.concat([df, temp])
    
    return df

df = read_files('random_file_paths.txt')
print(df.shape)
df.head()

(3661057, 4)
CPU times: user 10.1 s, sys: 1.96 s, total: 12 s
Wall time: 12 s


Unnamed: 0,timestamp,artist-MBID,release-MBID,recording-MBID
0,1108412731,f4a31f0a-51dd-4fa7-986d-3095c40c5ed9,426c5c82-4472-4f7d-b1d9-9f928d338340,1deb956c-5439-4fbb-b026-5adb4330a934
1,1108422818,db999c3f-f243-4a5f-88d6-0c25243b6661,6079df6b-2c00-4fd6-b015-0e303eedf4fd,14e9eb4e-155d-46ff-9a83-a8d5e1936c81
2,1108423325,ce58d854-7430-4231-aa44-97f0144b3372,e427c52c-60f4-3df4-9493-2df0734d85aa,eced9a9b-cd59-40f8-a580-f27094bd8a89
3,1108428897,ce58d854-7430-4231-aa44-97f0144b3372,e427c52c-60f4-3df4-9493-2df0734d85aa,eced9a9b-cd59-40f8-a580-f27094bd8a89
4,1108429140,ce58d854-7430-4231-aa44-97f0144b3372,e427c52c-60f4-3df4-9493-2df0734d85aa,eced9a9b-cd59-40f8-a580-f27094bd8a89


In [11]:
null_count_artist = df['artist-MBID'].isnull().value_counts()
null_count_rec = df['recording-MBID'].isnull().value_counts()
null_count_rel = df['release-MBID'].isnull().value_counts()

def get_null_stats(val_count, attr_name):
    print("Number of NOT-null rows in {} = {}".format(attr_name, (val_count[0]/val_count.sum())*100))

get_null_stats(null_count_artist, 'artist-MBID')
get_null_stats(null_count_rec, 'recording-MBID')
get_null_stats(null_count_rel, 'release-MBID')

Number of NOT-null rows in artist-MBID = 98.87100911021052
Number of NOT-null rows in recording-MBID = 100.0
Number of NOT-null rows in release-MBID = 75.44897552810568


## Architechture:

1. Take a chunk of MBIDs (Test optimal chunk sizes too. Current optimal > 253k rows)
2. "Squish" series
    - i.e. Only take unique values from the series.
3. Pass squished series thorugh the following:
    - Get rec-mbids, and check if they exist in the track table.
    - Get rec-mbids, and check if they exist in the track_gid_redirect table.
    - Get rec-mbids, and check if they exist in the recording table.
    - Get rec-mbids that don't exist in recording, and pass it through MB_redirect
    - Get artist-mbids, and check if they exist in the artist table.
    - Get artist-mbids, and check if they exist in the artist_gid_redirect table.

4. "Unsquish" the series.
    - i.e. Take processed output for squished values, and apply them to unsquished values.
    - This process ensures processing only on unique values.
    The output for this processing is then applied to duplicate values as well.

In [12]:
# '''Squish function: 
# 1. Takes in input series with index number and recording-MBID.
# 2. Makes a mapping table with recording-MBIDs as the index, 
# and a series of row-indices with that MBID as the values.'''

# # def squish(input_series):

# '''
# 1. take inp_series
# 2. Generate empty mapping_df where:
#     - index = MBID
#     - value = series of indices from inp_series
# 2. start traversing
# 3. if new ID: 
#     - Add ID to mapping_df.index
#     - Set the value in mapping_df as a list of newly updated indices for inp_series
# 4. if not new ID:
#     - Add ID to list of indices for inp_series.
#     - Update this ID in mapping_df.index
# '''

# # inp = df['recording-MBID'].reset_index(drop=True)
# # inp_unique = inp.unique()

# # mapping_df = pd.DataFrame(index=inp_unique)
# # mapping_df

# # Tackle this later. Focus on basic unoptimized code first!

In [49]:
# A generic function for queries a series into another series.
    # Returns a series of boolean values corresponding to series_of_mbids 
    # (bool specifies if value exists in recording table or not.)

def query_in(series_to_query, series_to_query_in):
    # Queries all mbids in the recording table. Returns mbids that are present in recording table.
    mbids_in_series = series_to_query_in[series_to_query_in.isin(series_to_query)]
    
    # Makes a boolean map for all mbids in series_of_mbids
    bool_map = series_to_query.isin(mbids_in_series)
    
    return bool_map

## Testing Track MBIDs

In [50]:
%%time

# Just to prove that absolutely none of the of the 381k unique mbids are track-mbids disguised as rec-mbid.
"""
Input: all unique recording-MBIDs from the input dataset.
Output: Boolmap of all recording-MBIDs that belong to the MB "track" table.
"""

in_for_track = pd.Series(df['recording-MBID'].unique())
out_for_track = query_in(in_for_track, MB_track)

out_for_track.value_counts()

CPU times: user 2.61 s, sys: 276 ms, total: 2.89 s
Wall time: 2.89 s


False    381145
dtype: int64

In [None]:
in_for_track = pd.Series(df['recording-MBID'].unique())
out_for_track = query_in(in_for_track, MB_track)

out_for_track.value_counts()

In [15]:
%%time

"""
Input: all unique recording-MBIDs from the input dataset.
Output: Boolmap of all recording-MBIDs that belong to the MB "track_gid_redirect" table.
"""

in_for_track = pd.Series(df['recording-MBID'].unique())
out_for_track_redir = query_in(in_for_track, MB_track_redirects.old)

track_mbid_in_r_mbid = out_for_track.value_counts()
track_mbid_in_r_mbid

CPU times: user 530 ms, sys: 34.6 ms, total: 565 ms
Wall time: 562 ms


False    381145
dtype: int64

## Testing for recording-MBID

In [16]:
%%time
"""
Input: all unique recording-MBIDs from the input dataset.
Output: Boolmap of all recording-MBIDs that belong to the entity table (MB "recording" table.)
"""

in_for_rec = pd.Series(df['recording-MBID'].unique())
out_for_rec = query_in(in_for_rec, MB_recording.gid)

r_mbid_in_r_mbid = out_for_rec.value_counts()
r_mbid_in_r_mbid

CPU times: user 9 s, sys: 14.7 ms, total: 9.01 s
Wall time: 9.02 s


True     295214
False     85931
dtype: int64

## Testing for rec-MBID redirects

In [17]:
%%time

# Testing if MBIDs that DONT exist in MB recording table have any redirects
"""
Input: all unique recording-MBIDs from the input dataset that DON'T belong the MB "recording" table.
Output: Boolmap of all recording-MBIDs that have a redirect available.
"""
in_for_rec_redir = in_for_rec[-out_for_rec]

out_for_rec_redir = query_in(in_for_rec_redir, MB_redirects.old)

out_for_rec_redir.value_counts()

CPU times: user 761 ms, sys: 2.52 ms, total: 764 ms
Wall time: 759 ms


True     84783
False     1148
dtype: int64

## Checking if artist-MBIDs belong to artist ID.

In [18]:
# Checking if artist-MBIDs belong to artist ID.
"""
Input: all unique artist-MBIDs from the dataset.
Output: Boolmap of all artist-MBIDs that are present in the MB "artist" table.
"""

input_artists = pd.Series(df['artist-MBID'].dropna().unique())

output_artists = query_in(input_artists, MB_artist.gid)
output_artists.value_counts()

True     30839
False     2596
dtype: int64

In [19]:
# Checking if artist-MBIDs belong to artist ID.
"""
Input: all unique artist-MBIDs from the dataset that DON'T belong to the MB "artist" table.
Output: Boolmap of all artist-MBIDs that are NOT present in the MB "artist" table and have a redirect available.
"""

input_artists_redirect = input_artists[-output_artists]

output_artists_redirect = query_in(input_artists_redirect, MB_artist_redirects.old)
output_artists_redirect.value_counts()

True     2453
False     143
dtype: int64

# ANALYSIS REPORTS

In [20]:
report_track = """
## Track-MBIDs

### Number of rec-MBIDs in track:
    - Total Input: {}
    - False: {}

### Number of rec-MBIDs in track_gid_redirect:
    - Total Input: {}
    - False: {}
""".format(len(in_for_track),
            out_for_track.value_counts()[0],
            len(in_for_track),
            out_for_track_redir.value_counts()[0])


display(MD(report_track))


## Track-MBIDs

### Number of rec-MBIDs in track:
    - Total Input: 381145
    - False: 381145

### Number of rec-MBIDs in track_gid_redirect:
    - Total Input: 381145
    - False: 381145


In [21]:
report_rec = """
## recording-MBIDs
### Number of rec-MBIDs in recording-MBID:
    - Total Input: {} rows
    - (True, False) = {}, {}
    - i.e. {}% rec-MBIDs belong to the recording-MBID table. 
    (and {}% rec-MBIDs DONT belong to the recording-MBID table.)


### Number of UNKNOWN rec-MBIDs in redirected-MBID:
    - Note: "UNKNOWN rec-MBIDs" = rec-MBIDs from the MLHD that were NOT found in the MB recording table.
    - Total Input: {} rows
    - (True, False): {}, {}
    - i.e. {}% of UNKNOWN rec-MBIDs don't have any corresponding redirects.
    (and {}% of UNKNOWN rec-MBIDs can be redirected to a valid rec-MBID)

i.e. *_{}% of ALL UNIQUE rec-MBIDs are unknown. (Don't belong to the recording table OR have a valid redirect.)_*

### Number of rec-MBIDs in canonical-MBID table:
    - _pending_
""".format(len(in_for_rec),
            out_for_rec.value_counts()[1],
            out_for_rec.value_counts()[0],
            str((out_for_rec.value_counts()[1]/len(in_for_rec))*100)[:5],
            str((out_for_rec.value_counts()[0]/len(in_for_rec))*100)[:5],

            len(in_for_rec_redir),
            out_for_rec_redir.value_counts()[1],
            out_for_rec_redir.value_counts()[0],
            str((out_for_rec_redir.value_counts()[0]/len(in_for_rec_redir))*100)[:5],
            str((out_for_rec_redir.value_counts()[1]/len(in_for_rec_redir))*100)[:5],
        
            str((out_for_rec_redir.value_counts()[0]/len(in_for_rec))*100)[:5]
            )

display(MD(report_rec))


## recording-MBIDs
### Number of rec-MBIDs in recording-MBID:
    - Total Input: 381145 rows
    - (True, False) = 295214, 85931
    - i.e. 77.45% rec-MBIDs belong to the recording-MBID table. 
    (and 22.54% rec-MBIDs DONT belong to the recording-MBID table.)


### Number of UNKNOWN rec-MBIDs in redirected-MBID:
    - Note: "UNKNOWN rec-MBIDs" = rec-MBIDs from the MLHD that were NOT found in the MB recording table.
    - Total Input: 85931 rows
    - (True, False): 84783, 1148
    - i.e. 1.335% of UNKNOWN rec-MBIDs don't have any corresponding redirects.
    (and 98.66% of UNKNOWN rec-MBIDs can be redirected to a valid rec-MBID)

i.e. *_0.301% of ALL UNIQUE rec-MBIDs are unknown. (Don't belong to the recording table OR have a valid redirect.)_*

### Number of rec-MBIDs in canonical-MBID table:
    - _pending_


In [22]:
report_artist = """
### Number of artist-MBIDs in MB artist table:
    - Total Input: {}
    - True, False = {}, {}
    - i.e. {}% artist-MBIDs exist in the MB artist table.

### Number of UNKNOWN artist-MBIDs in MB artist_gid_redirect table:
    - Total Input: {}
    - True, False = {}, {}
    - i.e. {}% artist-MBIDs exist in the MB artist table.

i.e. {}% of ALL UNIQUE artist-MBIDs are completely unknown. (Don't belong to artist table OR artist_gid_redirect table.)
""".format(len(input_artists),
            output_artists.value_counts()[0],
            output_artists.value_counts()[1],
            str((output_artists.value_counts()[1]/len(input_artists))*100)[:5],
            
            len(input_artists_redirect),
            output_artists_redirect.value_counts()[0],
            output_artists_redirect.value_counts()[1],
            str((output_artists_redirect.value_counts()[1]/len(input_artists_redirect))*100)[:5],

            str((output_artists_redirect.value_counts()[0]/len(input_artists_redirect))*100)[:5]
            )

display(MD(report_artist))


### Number of artist-MBIDs in MB artist table:
    - Total Input: 33435
    - True, False = 2596, 30839
    - i.e. 92.23% artist-MBIDs exist in the MB artist table.

### Number of UNKNOWN artist-MBIDs in MB artist_gid_redirect table:
    - Total Input: 2596
    - True, False = 143, 2453
    - i.e. 94.49% artist-MBIDs exist in the MB artist table.

i.e. 5.508% of ALL UNIQUE artist-MBIDs are completely unknown. (Don't belong to artist table OR artist_gid_redirect table.)


# Making a list of unknown MBIDs for testing!

In [23]:
# Listing unknown rec-MBIDs with timestamp

unk_rec_mbid = in_for_rec_redir[-out_for_rec_redir]

main_df_rec_timestamp = df[['timestamp', 'recording-MBID']].drop_duplicates('recording-MBID')
main_df_rec_timestamp.timestamp = pd.to_datetime(main_df_rec_timestamp.timestamp, unit='s', utc=True)

unk_rec_mbid_timestamp = main_df_rec_timestamp[main_df_rec_timestamp['recording-MBID'].isin(unk_rec_mbid)]
unk_rec_mbid_timestamp.sort_values(by = 'timestamp', inplace=True)

unk_rec_mbid_timestamp.to_csv('unk_ids/unk_rec_mbids_timestamp.txt', index=False)


unk_rec_mbid_timestamp

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unk_rec_mbid_timestamp.sort_values(by = 'timestamp', inplace=True)


Unnamed: 0,timestamp,recording-MBID
18,2005-02-14 01:24:47+00:00,f052641b-c2b9-48f4-8d6b-61a4de888ab4
525,2005-02-22 22:07:08+00:00,c8138848-7ddf-4c03-b6cb-c0d7c1665c65
937,2005-02-28 01:18:07+00:00,94079821-b54f-4dde-9789-9bbb513ee5c6
1059,2005-03-02 06:46:52+00:00,3139d467-a5ff-45ae-8546-1688951ed025
993,2005-03-14 20:49:52+00:00,7c543dbf-1451-4975-aec3-add4c1018e7d
...,...,...
40729,2013-08-02 09:11:49+00:00,5d53b863-0061-440f-819f-227478845a5a
17910,2013-08-04 11:10:14+00:00,9ebd01b2-39a4-4d55-a637-32e84c6e042d
1389,2013-08-10 16:30:43+00:00,2f7bd8f9-a524-4d4a-9956-d8aa61bace0f
18945,2013-08-23 14:25:26+00:00,79508a19-fe15-468f-9e6d-3bad397692db


In [25]:
%%time
# Listing unknown rec-MBIDs with timestamp

unk_artist_mbid = input_artists_redirect[-output_artists_redirect]

main_df_artist_timestamp = df[['timestamp', 'artist-MBID']].drop_duplicates('artist-MBID')
main_df_artist_timestamp.timestamp = pd.to_datetime(main_df_artist_timestamp.timestamp, unit='s', utc=True)

unk_artist_mbid_timestamp = main_df_artist_timestamp[main_df_artist_timestamp['artist-MBID'].isin(unk_artist_mbid)]
unk_artist_mbid_timestamp.sort_values(by = 'timestamp', inplace=True)
unk_artist_mbid_timestamp.to_csv('unk_ids/unk_artist_mbids_timestamp.txt', index=False)

unk_artist_mbid_timestamp

CPU times: user 397 ms, sys: 22.4 ms, total: 419 ms
Wall time: 457 ms


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,timestamp,artist-MBID
5758,2005-05-30 15:20:47+00:00,02e786a4-613b-48de-b496-9205ce38298b
7462,2005-06-24 01:06:12+00:00,f3d0e228-57d5-49d4-a1e3-094e5c27caf2
13320,2005-10-12 17:25:31+00:00,3819dcbe-06a9-45bd-bd93-89316f7e1b15
17866,2006-03-15 17:43:58+00:00,e1df836f-74da-4a61-a6f8-fd2cbf4e594f
3599,2006-04-21 19:01:45+00:00,d2374cc1-a6f5-42af-bd1d-ecbd3355edce
...,...,...
53667,2013-02-05 16:13:08+00:00,b3b07880-f760-4430-beac-e715185d467f
31085,2013-04-02 20:12:19+00:00,9bafefe7-65af-4194-a16d-958884d7da69
46514,2013-04-11 23:13:11+00:00,08c31b75-d99a-47ce-bf3f-43cc122832fb
15868,2013-05-28 14:50:33+00:00,56eebce4-e56c-4b2f-b7fa-0ddc7816d0a8
