In [1]:
import pandas as pd
import spotipy
import time
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy import SpotifyException
from urllib.request import urlretrieve
import os
from ratelimit import limits, sleep_and_retry
import traceback
import requests



In [2]:
# Initialize Spotipy client credentials managers
client_credentials_manager = SpotifyClientCredentials(client_id='',
                                                         client_secret='')

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)



In [3]:
df = pd.read_csv('bands_by_genre.csv')
print(df.shape)
df = df.drop_duplicates(subset='band_name')
print(df.shape)
df.to_csv('bands_by_genre.csv')

(6333, 8)
(6333, 8)


In [6]:
# Create a custom session with a longer timeout value (e.g., 60 seconds)
session = requests.Session()
session.mount('https://', requests.adapters.HTTPAdapter(pool_connections=1, pool_maxsize=1, max_retries=3, pool_block=True))
session.timeout = 60

def download_image(url, directory, filename):
    # Create directory if it doesn't exist
    os.makedirs(directory, exist_ok=True)

    # Replace invalid characters in the filename
    filename = "".join(c if c.isalnum() or c in {' ', '_', '-', '.'} else '_' for c in filename.strip())

    # Save the image to the specified directory with the given filename
     # Save the image to the specified directory with the given filename
    filepath = os.path.join(directory, filename)
    try:
        urlretrieve(url, filepath)
    except Exception as e:
        print(f"Error downloading image for {filename}: {e}")
        traceback.print_exc()
    else:
        return filepath

processed = pd.read_csv('albums.csv')
processed_bands = set(processed['band_name'])  # Convert to a set for faster lookup

# Decorator for rate limiting
@sleep_and_retry
@limits(calls=160, period=60)  # Adjust rate limit as per Spotify API limits
def call_spotify_api(func, *args, **kwargs):
    try:
        return func(*args, **kwargs)
    except requests.exceptions.Timeout:
        print("Request timed out, retrying...")
        time.sleep(10)  # Wait for 10 seconds before retrying
        return call_spotify_api(func, *args, **kwargs)

df = pd.read_csv('bands_by_genre_id.csv')
chunklist = []
start, end, step = 0, len(df['band_id']), 100
for i in range(start, end, step):
    chunklist.append(df['band_id'][i: i + step])

dct = {
    'genre': [],
    'band_name': [],
    'album_name': [],
    'album_release_date': [],
    'album_id': [],
}

for index, chunk in enumerate(chunklist):
    for artist_id in chunk:
        try:
            band_name = df.loc[df['band_id'] == artist_id, 'band_name'].values[0]
            if band_name in processed_bands:
                continue

            albums = call_spotify_api(sp.artist_albums, artist_id)
            if albums['items']:
                for album in albums['items']:
                    if album['album_type'] == 'album':
                        genre = df.loc[df['band_id'] == artist_id, 'genre'].values[0]
                        dct['genre'].append(genre)
                        dct['band_name'].append(band_name)
                        dct['album_name'].append(album['name'])
                        dct['album_release_date'].append(album['release_date'])
                        dct['album_id'].append(album['id'])

                        if album['images']:
                            url = album['images'][0]['url']
                            filename = f"{band_name}_{album['name']}.jpg"
                            sanitized_genre = "".join(c for c in genre if c.isalnum() or c in {' ', '_', '-'}).strip()
                            directory = os.path.join("data", sanitized_genre)
                            download_image(url, directory, filename)
                        time.sleep(0.5)  # Add a short delay between album downloads
        

        except (SpotifyException, ConnectionError) as e:
            print(f"Error occurred at index {index}: {e}")
            time.sleep(10)
            continue
    print(band_name)
    # Append the band IDs to the list and create a DataFrame with an index
    albumdf = pd.DataFrame(dct)

    # Append to albums2.csv instead of overwriting
    
    albumdf.to_csv('albums2.csv', mode='a', index=False, header=not os.path.exists('albums2.csv'))
    print(f'progress saved chunk {index}/{len(chunklist)}')

# Save the final DataFrame to CSV
albumdf.to_csv('albums2.csv', mode='a', index=False, header=not os.path.exists('albums2.csv'))

In [5]:
session = requests.Session()
session.mount('https://', requests.adapters.HTTPAdapter(pool_connections=1, pool_maxsize=1, max_retries=3, pool_block=True))
session.timeout = 60
test = sp.audio_features(['3uSATa7xWXVJrMhJJSnGGk', '5iG1GycXpheJ5xMW0gNvKv', '2hBn08QphNtdnhwDNtnZMg', '6HeYgpgqEo8SGis4dHwRbu', '1ALXFmNr7lrTug2LAE989q', '6eXWc7irALyENtbAD2TTOT', '6eiFVKWhv9EYL2RdrVFtyC', '0aHwh2rqyJNN0eZ4sGZLEt', '1oCQLSGSiHozdyx7lOqkIt', '2ZPnedMaS4W1de05Xz18hF', '3L82LuRzu60EPSK1yf5Pxr', '589aAz8258JEsMwpJglalU', '4qAakbD2wRBjnKxlU8EywU', '2bvRtBoXOc1eeMQ9xQRuif', '0kNrZfKZMvqxK36pUeB4wX', '5icLTeOSXTvkLIi08P2k4g', '5OxoeZOWZTMj06cPftX1Iz', '74ipwWk1JxGtqZjKNq57rm', '27oBnGKPph4BtZKKjd9b2j', '1nwQ9wWcGjAQt4u65AKkbk', '4HcGbZ4n4j1rObwTmix9Ca', '5pEBCke4Ft8N5v8jww1eUn', '5jMMdrSIAGueAxfeI1AG5K', '5PNXZX1QUeae7WjsCfycwc', '3LGrbnTyEszZ8JlWLFnznz', '0YSoqT8eVDJFIui9PR4aGk', '0zKAEqyCG1nudg3bFRrZvt', '4yE0IGEKGGd1w2rWlUywK8', '57ou3Gno3QiWTuyOqMC0aS', '12yBSkAxdEFXFkclZ8wc8S', '5cpoALsvCDLvrpuuVZb6gH', '78OYMU05t1YJUHyOmGM2su', '4nvFgshg9sO9YVnlKgQysN', '0F0lB96kMC1BRDkjvgMiWG', '10qHfyQqzsbBDwob8nk8Ol', '6fjer7Ed6ZUna1EMbwbDrN', '2NBvvx52QAjipBnXvAekfs', '3JjZq20Kv6UphyyirAaY9A', '1dzmb0UZZatm5VCD8fI8UF', '0dcFoXAFWxx7oSIIq2mtkW', '3m8zeHjUKHDVHbx5JmS2qx', '4IuzY8RNrCGPIchltRMWzz', '4Ixe61Sv0WlxBhDjRtEgH3', '5EA75qDylWv25sjzyM1BOa', '1CNCFVas53ePcTa6xjMhoF', '3eB1b4DL6O9TMOksOYg83l', '4VSgmiMD3DGZXVtsVFZDB1', '0xdqFv6Mvyg7qqyzU3NcpU', '31zZcvGSqPm4AFCMDFEar1', '2L0YIc1p5sFVVbDb6xSiJk', '70YTikYfDKj3mgH5VQ7EcH', '78s2i68GL34JnJgAbTSKxQ', '1QLv5VkgmXbdYMe3CC17sI', '0TAdrMcXwaah5FYMDVikh1', '11txewAaM18eDXWYXwnkH7', '1HgwkxTRigwfI5h9XPdX0d', '2EEzTxM4MMjTuDG146TK3G', '62qcKV45EjiFwEk074P9Mk', '3BVJ1nwvdmYjXJ7gBTXTGX', '13Uvqll8OQDjw3wDweIK9y', '3e7wv9TChjWrnXHrk5NyBU', '2PSXyiERCaNmRyFA06UMjq', '0qbV4e18lWrTTxlswVoLbI', '09VNOLsjKwQasQXhsS8sRA', '2VTJaQNkvxU4S2zjkVJirF', '7MtBapy0MnmBVsU3o0yrFz', '5ZTSBNqk3wDvhgN12c2ECP', '4qsVHamREe2Vpfur0jczkR', '1vy3wxf8GbW7DBSfxPDKBM', '42TNtAEG1xM0bLTVGMl6gb', '7q7uolRH6h7tWETTTUmjPI', '4CO6V2Nkr7gQCXwfuJuUDm', '5eBEBLKwazqvkfk9uvqvLh', '3J4ZZ83PrVqPgN7KY6jn4S', '2J3XzDLjbXkny8qhr33zTh', '5JAnXx5BBES4FEf4d4t4CC', '4hjTm9cn9ZPwfKnbyW6frn', '6xFoN6e5B9DTw6aV6yT3KF', '105mv6XqVQMEUVfPkIeAQh', '3zG3x7RA9DjR7a9JMNWv1N', '6ATqmUzk4xbq3XpVsZMPW5', '0nP2Se8dSKsdOkg6XEee60', '2lJhUIQR7CYQZD6tyl3aF0', '7ucFoRbiHn7SOVfHFHRjbE', '61MvWKeLK4tjdSuuB8y8B8', '3RlKO1La6qRZp4qWVpZCPp', '7vxLun2SVGuJII6DkbyMso', '4zfEDLbmmI7V9Yt12s0DQH', '3wHjbDGFrHN17FWg4owTkc', '4y9IRABVc2TzATl8mTkexk', '6z8LLgG81xuNzqJO5XjWPP', '4lmSiv5xEJJZsO40XtSK1J', '2UIuYNBg8r5pkoG2MjdLZT', '1WgzDxuLr7iCKLKHGcygql', '3I4tEaOcF0wtl6179DqbZb', '7c7z3Ep51f2LIANMPRH55h', '5jEfQZmmwwKymr9cfmgfsd', '4GVS3i2hzZZaL1OZigNafO', '1JOkHtzr0DBLFKpccLO5Yq', '5kfSRmjf9aSougnHL6KgLH']
)
print(test)

Max Retries reached


SpotifyException: http status: 429, code:-1 - /v1/audio-features/?ids=3uSATa7xWXVJrMhJJSnGGk,5iG1GycXpheJ5xMW0gNvKv,2hBn08QphNtdnhwDNtnZMg,6HeYgpgqEo8SGis4dHwRbu,1ALXFmNr7lrTug2LAE989q,6eXWc7irALyENtbAD2TTOT,6eiFVKWhv9EYL2RdrVFtyC,0aHwh2rqyJNN0eZ4sGZLEt,1oCQLSGSiHozdyx7lOqkIt,2ZPnedMaS4W1de05Xz18hF,3L82LuRzu60EPSK1yf5Pxr,589aAz8258JEsMwpJglalU,4qAakbD2wRBjnKxlU8EywU,2bvRtBoXOc1eeMQ9xQRuif,0kNrZfKZMvqxK36pUeB4wX,5icLTeOSXTvkLIi08P2k4g,5OxoeZOWZTMj06cPftX1Iz,74ipwWk1JxGtqZjKNq57rm,27oBnGKPph4BtZKKjd9b2j,1nwQ9wWcGjAQt4u65AKkbk,4HcGbZ4n4j1rObwTmix9Ca,5pEBCke4Ft8N5v8jww1eUn,5jMMdrSIAGueAxfeI1AG5K,5PNXZX1QUeae7WjsCfycwc,3LGrbnTyEszZ8JlWLFnznz,0YSoqT8eVDJFIui9PR4aGk,0zKAEqyCG1nudg3bFRrZvt,4yE0IGEKGGd1w2rWlUywK8,57ou3Gno3QiWTuyOqMC0aS,12yBSkAxdEFXFkclZ8wc8S,5cpoALsvCDLvrpuuVZb6gH,78OYMU05t1YJUHyOmGM2su,4nvFgshg9sO9YVnlKgQysN,0F0lB96kMC1BRDkjvgMiWG,10qHfyQqzsbBDwob8nk8Ol,6fjer7Ed6ZUna1EMbwbDrN,2NBvvx52QAjipBnXvAekfs,3JjZq20Kv6UphyyirAaY9A,1dzmb0UZZatm5VCD8fI8UF,0dcFoXAFWxx7oSIIq2mtkW,3m8zeHjUKHDVHbx5JmS2qx,4IuzY8RNrCGPIchltRMWzz,4Ixe61Sv0WlxBhDjRtEgH3,5EA75qDylWv25sjzyM1BOa,1CNCFVas53ePcTa6xjMhoF,3eB1b4DL6O9TMOksOYg83l,4VSgmiMD3DGZXVtsVFZDB1,0xdqFv6Mvyg7qqyzU3NcpU,31zZcvGSqPm4AFCMDFEar1,2L0YIc1p5sFVVbDb6xSiJk,70YTikYfDKj3mgH5VQ7EcH,78s2i68GL34JnJgAbTSKxQ,1QLv5VkgmXbdYMe3CC17sI,0TAdrMcXwaah5FYMDVikh1,11txewAaM18eDXWYXwnkH7,1HgwkxTRigwfI5h9XPdX0d,2EEzTxM4MMjTuDG146TK3G,62qcKV45EjiFwEk074P9Mk,3BVJ1nwvdmYjXJ7gBTXTGX,13Uvqll8OQDjw3wDweIK9y,3e7wv9TChjWrnXHrk5NyBU,2PSXyiERCaNmRyFA06UMjq,0qbV4e18lWrTTxlswVoLbI,09VNOLsjKwQasQXhsS8sRA,2VTJaQNkvxU4S2zjkVJirF,7MtBapy0MnmBVsU3o0yrFz,5ZTSBNqk3wDvhgN12c2ECP,4qsVHamREe2Vpfur0jczkR,1vy3wxf8GbW7DBSfxPDKBM,42TNtAEG1xM0bLTVGMl6gb,7q7uolRH6h7tWETTTUmjPI,4CO6V2Nkr7gQCXwfuJuUDm,5eBEBLKwazqvkfk9uvqvLh,3J4ZZ83PrVqPgN7KY6jn4S,2J3XzDLjbXkny8qhr33zTh,5JAnXx5BBES4FEf4d4t4CC,4hjTm9cn9ZPwfKnbyW6frn,6xFoN6e5B9DTw6aV6yT3KF,105mv6XqVQMEUVfPkIeAQh,3zG3x7RA9DjR7a9JMNWv1N,6ATqmUzk4xbq3XpVsZMPW5,0nP2Se8dSKsdOkg6XEee60,2lJhUIQR7CYQZD6tyl3aF0,7ucFoRbiHn7SOVfHFHRjbE,61MvWKeLK4tjdSuuB8y8B8,3RlKO1La6qRZp4qWVpZCPp,7vxLun2SVGuJII6DkbyMso,4zfEDLbmmI7V9Yt12s0DQH,3wHjbDGFrHN17FWg4owTkc,4y9IRABVc2TzATl8mTkexk,6z8LLgG81xuNzqJO5XjWPP,4lmSiv5xEJJZsO40XtSK1J,2UIuYNBg8r5pkoG2MjdLZT,1WgzDxuLr7iCKLKHGcygql,3I4tEaOcF0wtl6179DqbZb,7c7z3Ep51f2LIANMPRH55h,5jEfQZmmwwKymr9cfmgfsd,4GVS3i2hzZZaL1OZigNafO,1JOkHtzr0DBLFKpccLO5Yq,5kfSRmjf9aSougnHL6KgLH:
 Max Retries, reason: too many 429 error responses

In [23]:
# Create a custom session with a longer timeout value (e.g., 60 seconds)
session = requests.Session()
session.mount('https://', requests.adapters.HTTPAdapter(pool_connections=1, pool_maxsize=1, max_retries=3, pool_block=True))
session.timeout = 60

def download_preview(url, directory, filename):
    counter = 0
    # Create directory if it doesn't exist
    os.makedirs(directory, exist_ok=True)

    # Save the preview with the track_id as the filename
    filepath = os.path.join(directory, f"{filename}.mp3")
    try:
        urlretrieve(url, filepath)
        downloaded = 'Y'
    except Exception:
        counter +=1
        print(f'{counter} preview tracks not available')
        downloaded = 'N'
    else:
        return filepath, downloaded

def process_album(tracks, album_id):
    unavailable_count = 0
    if tracks['items']:
        genre = albums.loc[albums['album_id'] == album_id, 'genre'].values[0]
        band_name = albums.loc[albums['album_id'] == album_id, 'band_name'].values[0]
        album_name = albums.loc[albums['album_id'] == album_id, 'album_name'].values[0]
        
        genre_list = []
        band_name_list = []
        album_name_list = []
        album_id_list = []
        track_ids = []
        track_titles = []
        preview_urls = []
        preview_downloaded = []

        for track in tracks['items']:
            track_id = track['id']
            track_title = track['name']
            if (album_id, track_id) not in processed_tracks:
                track_ids.append(track_id)
                track_titles.append(track_title)
                genre_list.append(genre)
                band_name_list.append(band_name)
                album_name_list.append(album_name)
                album_id_list.append(album_id)
                if track['preview_url']:
                    preview_urls.append(track['preview_url'])
                    preview_downloaded.append('N')  # Initialize as 'N' and update later if downloaded successfully
                else:
                    unavailable_count += 1
                    preview_downloaded.append('N')

        for index, (track_id, url) in enumerate(zip(track_ids, preview_urls)):
            sanitized_genre = "".join(c for c in genre if c.isalnum() or c in {' ', '_', '-'}).strip()
            directory = os.path.join("data", sanitized_genre)
            result = download_preview(url, directory, track_id)
            if result:
                filepath, downloaded = result
                preview_downloaded[index] = downloaded

        dct['genre'].extend(genre_list)
        dct['band_name'].extend(band_name_list)
        dct['album_name'].extend(album_name_list)
        dct['album_id'].extend(album_id_list)
        dct['track_id'].extend(track_ids)
        dct['track_title'].extend(track_titles)
        dct['preview_downloaded'].extend(preview_downloaded)

        processed_tracks.update([(album_id, track_id) for track_id in track_ids])

        if unavailable_count > 0:
            print(f'{unavailable_count} preview tracks not available for {album_name} by {band_name}')

        time.sleep(0.5)  # Add a short delay between album downloads

albums = pd.read_csv('albums.csv')
processed = pd.read_csv('tracks.csv')
processed_albums = set(processed['album_id'])  # Convert to a set for faster lookup
processed_tracks = set(zip(processed['album_id'], processed['track_id']))  # Set of (album_id, track_id) tuples

# Decorator for rate limiting
@sleep_and_retry
@limits(calls=100, period=60)  # Adjust rate limit as per Spotify API limits
def call_spotify_api(func, *args, **kwargs):
    try:
        return func(*args, **kwargs)
    except requests.exceptions.Timeout:
        print("Request timed out, retrying...")
        time.sleep(10)  # Wait for 10 seconds before retrying
        return call_spotify_api(func, *args, **kwargs)

chunklist = []
start, end, step = 0, len(albums['album_id']), 20
for i in range(start, end, step):
    chunklist.append(albums['album_id'][i: i + step])

dct = {
    'genre': [],
    'band_name': [],
    'album_name': [],
    'album_id': [],
    'track_title':[],
    'track_id':[],
    'preview_downloaded': []
}

for index, chunk in enumerate(chunklist):
    for album_id in chunk:
        if album_id not in processed_albums:
            try:       
                album_tracks = call_spotify_api(sp.album_tracks, album_id)
                process_album(album_tracks, album_id)
                   
            except (SpotifyException, ConnectionError) as e:
                print(f"Error occurred at index {index}: {e}")
                time.sleep(10)
                continue

    # Create a new DataFrame from the dct dictionary
    trackdf = pd.DataFrame(dct)

    # Drop duplicate rows based on 'album_id' and 'track_id' columns
    trackdf.drop_duplicates(subset=['album_id', 'track_id'], inplace=True)

    # Save the DataFrame to CSV, appending to the existing file
    trackdf.to_csv('tracks.csv', mode='a', index=False, header=not os.path.exists('tracks.csv'))
    print(f'progress saved chunk {index}/{len(chunklist)}')

    # Reset the dct dictionary to avoid duplicates in the next chunk
    for key in dct.keys():
        dct[key] = []

# Save the final DataFrame to CSV
trackdf = pd.DataFrame(dct)
trackdf.drop_duplicates(subset=['album_id', 'track_id'], inplace=True)
trackdf.to_csv('tracks.csv', mode='a', index=False, header=not os.path.exists('tracks.csv'))
print('Album scraping completed')

progress saved chunk 0/1221
progress saved chunk 1/1221
progress saved chunk 2/1221
progress saved chunk 3/1221
progress saved chunk 4/1221
progress saved chunk 5/1221
progress saved chunk 6/1221
progress saved chunk 7/1221
progress saved chunk 8/1221
progress saved chunk 9/1221
progress saved chunk 10/1221
progress saved chunk 11/1221
progress saved chunk 12/1221
progress saved chunk 13/1221
progress saved chunk 14/1221
progress saved chunk 15/1221
progress saved chunk 16/1221
progress saved chunk 17/1221
progress saved chunk 18/1221
progress saved chunk 19/1221
progress saved chunk 20/1221
progress saved chunk 21/1221
progress saved chunk 22/1221
progress saved chunk 23/1221
progress saved chunk 24/1221
progress saved chunk 25/1221
progress saved chunk 26/1221
progress saved chunk 27/1221
progress saved chunk 28/1221
progress saved chunk 29/1221
progress saved chunk 30/1221
progress saved chunk 31/1221
progress saved chunk 32/1221
progress saved chunk 33/1221
progress saved chunk 34/

In [15]:
dct = {
    'id':[],
    'acousticness': [],
    'danceability': [],
    'duration_ms': [],
    'energy': [],
    'instrumentalness':[],
    'key': [],
    'liveness': [],
    'loudness_dB': [],
    'mode': [],
    'speechiness': [],
    'tempo':[],
    'time_signature':[],
    'valence': []
}

df = pd.DataFrame(dct)
df.to_csv('audio_features.csv', index=False)

In [7]:
# Function to process audio features for an album
def process_features(features):
    if features is None:
        print("Error: No data returned from Spotify API call")
        return {}  # Return an empty dictionary
    else:
        # Extract all specified keys from the list of dictionaries
        desired_keys = [
            'id', 'acousticness', 'danceability', 'duration_ms', 'energy',
            'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
            'speechiness', 'tempo', 'time_signature', 'valence'
        ]

        # Filter out null values and extract features
        valid_features = [item for item in features if item is not None]
        
        result_dict = {
            key: [item[key] for item in valid_features if item['id'] not in processed_features] 
            for key in desired_keys
        }

        time.sleep(0.5)  # Add a short delay between album downloads
        return result_dict

# Read track IDs from CSV file
tracks = pd.read_csv('tracks.csv')

# Read processed features from CSV file
processed = pd.read_csv('audio_features.csv')
processed_features = set(processed['id'])
print(f'nr of processed songs = {len(processed_features)}')
# Define a dictionary to store audio features

# Decorator for rate limiting
@sleep_and_retry
@limits(calls=100, period=60)  # Adjust rate limit as per Spotify API limits
def call_spotify_api(func, *args, **kwargs):
    try:
        return func(*args, **kwargs)
    except requests.exceptions.Timeout:
        print("Request timed out, retrying...")
        time.sleep(10)  # Wait for 10 seconds before retrying
        return call_spotify_api(func, *args, **kwargs)
    
dct = {}
c=0
chunklist = [tracks['track_id'][i: i + 100] for i in range(2000, len(tracks['track_id']), 100)]
for index, chunk in enumerate(chunklist):
    unprocessed_chunk = [track_id for track_id in chunk if track_id not in processed_features]
    if unprocessed_chunk:
    
        features = call_spotify_api(sp.audio_features, unprocessed_chunk)
        dct = (process_features(features))
        print(f'Progress saved chunk {index+1}/{len(chunklist)}, length = {len(unprocessed_chunk)}')
        # Save the DataFrame to CSV, appending to the existing file
        featuredf = pd.DataFrame(dct)
        featuredf.drop_duplicates(subset=['id'], inplace=True)
        featuredf.to_csv('audio_features.csv', mode='a', index=False, header=not os.path.exists('audio_features.csv'))

        # Reset the dct dictionary to avoid duplicates in the next chunk
        dct = {}
    else:
        continue

  
print('Feature scraping completed')

nr of processed songs = 276792
Progress saved chunk 11/2751, length = 2
Progress saved chunk 25/2751, length = 1
Progress saved chunk 30/2751, length = 1
Progress saved chunk 40/2751, length = 1
Progress saved chunk 97/2751, length = 1
Progress saved chunk 120/2751, length = 1
Progress saved chunk 127/2751, length = 1
Progress saved chunk 151/2751, length = 1
Progress saved chunk 191/2751, length = 1
Progress saved chunk 217/2751, length = 1
Progress saved chunk 235/2751, length = 1
Progress saved chunk 254/2751, length = 1
Progress saved chunk 256/2751, length = 1
Progress saved chunk 325/2751, length = 1
Progress saved chunk 355/2751, length = 1
Progress saved chunk 356/2751, length = 1
Progress saved chunk 380/2751, length = 2
Progress saved chunk 485/2751, length = 2
Progress saved chunk 502/2751, length = 1
Progress saved chunk 503/2751, length = 1
Progress saved chunk 541/2751, length = 1
Progress saved chunk 544/2751, length = 1
Progress saved chunk 603/2751, length = 1
Progress

KeyboardInterrupt: 