# Data Cleaning Pipeline

This notebook executes the data cleaning and merging process using the `data_cleaning` module.

In [28]:
import os 
import pandas as pd

import pathlib
import numpy as np

from tqdm import tqdm

In [29]:
from data_cleaning.artist_mapping import (
    get_unique_ids_from_column,
    get_all_combinations,
    get_artist_to_id,
    update_id_artists_with_mapping,
)
from data_cleaning.explicit_enrichment import (
    gemini_check_if_explicit,
    enrich_explicit_via_gemini,
)



In [30]:
from data_cleaning.process_charts import process_all_charts
from data_cleaning.merge import merge_data
from data_cleaning.clean_songs import (
    list_weekly_chart_files,
    extract_dates_from_filenames,
    summarize_weekly_date_gaps,
    create_song_dict,
    update_song_rows_with_dict,
    fill_with_proxy_dict_compat,
    fill_missing_from_dfs,
    prepare_df_for_parquet,
)

DATA_DIR = "data"

weekly_charts_path = os.path.join(DATA_DIR, "bronze", "data")
tracks_path = os.path.join(DATA_DIR, "bronze", "tracks.csv")
songs_path = os.path.join(DATA_DIR, "silver", "combined_songs.parquet")
output_path = os.path.join(DATA_DIR, "silver", "songs_with_features.parquet")

print("Starting data processing...")
process_all_charts(weekly_charts_path, songs_path)

print("Data processing complete.")

print("Starting data merging...")
merge_data(tracks_path, songs_path, output_path)
print("Data merging complete.")


if os.path.exists(output_path):
    # read parquet
    songs = pd.read_parquet(output_path)
    print("Songs loaded successfully. ({:_} rows)".format(songs.shape[0]))
else:
    raise FileNotFoundError("Error: Output path does not exist.")



ImportError: cannot import name 'prepare_df_for_parquet' from 'data_cleaning.clean_songs' (/Users/arthurmrv/Library/Mobile Documents/com~apple~CloudDocs/Documents/School/AIDAMS/S5/data/project/spotify_charts_project/data_cleaning/clean_songs.py)

In [None]:
tracks_df = pd.read_csv(tracks_path)
print(tracks_df.isna().sum())
print(tracks_df.shape)


id                   0
name                71
popularity           0
duration_ms          0
explicit             0
artists              0
id_artists           0
release_date         0
danceability         0
energy               0
key                  0
loudness             0
mode                 0
speechiness          0
acousticness         0
instrumentalness     0
liveness             0
valence              0
tempo                0
time_signature       0
dtype: int64
(586672, 20)


In [None]:
# get number of "track_id" in songs that are not in "id" column of tracks_df
print(songs[~songs["track_id"].isin(tracks_df["id"])].shape[0])

10631


In [None]:
print(songs.isna().sum())


track_id                0
artist_names            0
track_name              0
source                  0
streams                 0
week_date               0
name                10631
popularity          10631
duration_ms         10631
explicit            10631
artists             10631
id_artists          10631
release_date        10631
danceability        10631
energy              10631
key                 10631
loudness            10631
mode                10631
speechiness         10631
acousticness        10631
instrumentalness    10631
liveness            10631
valence             10631
tempo               10631
time_signature      10631
dtype: int64


## Verification
Check if no week was skipped during the webscraping.

In [None]:
# Example usage of verification helpers from data_cleaning.clean_songs
files = list_weekly_chart_files(weekly_charts_path)
dates = extract_dates_from_filenames(files)
summarize_weekly_date_gaps(dates)

First date: 2016-12-29
Last date: 2020-12-31
Total files: 210
Expected weeks: 210

Missing weeks:

Unexpected extra dates:


In [None]:
if "name" in songs.columns:
    # flag where name not NaN and track_name is NaN
    print("Rows where name is not included in track_name: {}/{:_}".format(songs[songs["name"].notna() & songs["track_name"].isna()].shape[0], songs.shape[0]))
    
    # Drop the "name" column as it is included in "track_name"
    songs.drop(columns=["name"], inplace=True)
    print("Column 'name' dropped successfully.")

Rows where name is not included in track_name: 0/41_995
Column 'name' dropped successfully.


## Clean songs

In [None]:
songs.columns, songs.shape

(Index(['track_id', 'artist_names', 'track_name', 'source', 'streams',
        'week_date', 'popularity', 'duration_ms', 'explicit', 'artists',
        'id_artists', 'release_date', 'danceability', 'energy', 'key',
        'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
        'liveness', 'valence', 'tempo', 'time_signature'],
       dtype='object'),
 (41995, 24))

In [None]:
# Create a dictionary of canonical song IDs using helper from data_cleaning.clean_songs
song_dict = create_song_dict(songs)
print(song_dict[("The Weeknd", "Blinding Lights")])

Processing rows: 100%|██████████| 41995/41995 [00:01<00:00, 23690.32it/s]

['0VjIjW4GlUZAMYd2vXMi3b', 'Republic Records', Timestamp('2020-03-20 00:00:00')]





In [None]:
# Apply the update function from data_cleaning.clean_songs
songs = update_song_rows_with_dict(songs, song_dict)

Updating songs: 100%|██████████| 41995/41995 [00:09<00:00, 4626.93it/s]


Number of songs updated: 6_458/41_995


In [None]:
# Fill missing values in columns of interest using helpers from data_cleaning.clean_songs
columns_to_fill = [
    'artist_names', 'track_name', 'source', 'duration_ms', 'explicit', 
    'popularity', 'artists', 'id_artists', 'release_date', 'danceability', 'energy', 
    'key', 'loudness', 'mode', 'speechiness', 'acousticness', 
    'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'
]

songs = fill_with_proxy_dict_compat(songs, columns_to_fill)


Number of rows filled: 11_069


In [None]:
print("There are still {:_} rows with NaN values".format(songs[songs.isna().any(axis=1)].shape[0]))

There are still 7_404 rows with NaN values


## Enrich still missing values

In [None]:
df_enrichment2_path = os.path.join(DATA_DIR, "bronze", "spotify_top_songs_audio_features.csv")
df_enrichment2 = pd.read_csv(df_enrichment2_path)
if "id" in df_enrichment2.columns:
    # Replace id with track_id
    df_enrichment2.rename(columns={"id": "track_id"}, inplace=True)
df_enrichment2.shape, df_enrichment2.columns 

((6513, 19),
 Index(['track_id', 'artist_names', 'track_name', 'source', 'key', 'mode',
        'time_signature', 'danceability', 'energy', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence', 'loudness',
        'tempo', 'duration_ms', 'weeks_on_chart', 'streams'],
       dtype='object'))

In [None]:
# List all dataframes in a kaggle_enrichment3_dir, then add them together to it is one big dataframe
kaggle_enrichment3_dir = os.path.join(DATA_DIR, "bronze", "kaggle_enrichment3")
import glob

# List all CSV files in the kaggle_enrichment3_dir
csv_files = glob.glob(os.path.join(kaggle_enrichment3_dir, "*.csv"))

# Read each CSV file into a DataFrame and collect them in a list
df_list = [pd.read_csv(f) for f in csv_files]

# Concatenate all DataFrames into a single big DataFrame
df_enrichment3 = pd.concat(df_list, ignore_index=True)

# Show shape and columns to confirm final structure
df_enrichment3.shape, df_enrichment3.columns


((247035, 17),
 Index(['artist_name', 'track_id', 'track_name', 'acousticness', 'danceability',
        'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',
        'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',
        'popularity'],
       dtype='object'))

In [None]:
set(df_enrichment2.columns).difference(set(columns_to_fill)), set(df_enrichment3.columns).difference(set(columns_to_fill))

({'streams', 'track_id', 'weeks_on_chart'}, {'artist_name', 'track_id'})

In [None]:
# Enrich missing values from external enrichment DataFrames using helper from data_cleaning.clean_songs
songs_gold = fill_missing_from_dfs(songs, columns_to_fill, "track_id", df_enrichment2, df_enrichment3)

Total missing values *before* processing DF: 125_826
Available columns: ['artist_names', 'track_name', 'source', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
Size of lookup dictionary : 6513


Enriching songs: 100%|██████████| 41995/41995 [00:14<00:00, 2983.49it/s]


Available columns: ['track_name', 'duration_ms', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
Size of lookup dictionary : 130989


Enriching songs: 100%|██████████| 41995/41995 [00:03<00:00, 11997.44it/s]

Total missing values *after* processing DF: 33_889





In [None]:
# save songs_gold
if "popularity" in songs_gold.columns:
    songs_gold.drop(columns=["popularity"], inplace=True)


In [None]:
# Show rows with NaN values in songs_gold
songs_gold[songs_gold.isna().any(axis=1)]

Unnamed: 0,track_id,artist_names,track_name,source,streams,week_date,duration_ms,explicit,artists,id_artists,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
5,25sgk305KZfyuqVBQIahim,Ava Max,Sweet but Psycho,Atlantic Records,22400542,2019-01-17,187436.0,,,,...,C#/Db,-4.724,Major,0.0476,0.0691,0.000000,0.1660,0.628,133.002,4 beats
11,7wFybC8jBH3zE139OpCtpG,"Gesaffelstein, The Weeknd",Lost in the Fire (feat. The Weeknd),Columbia,18491016,2019-01-17,202093.0,,,,...,D,-12.159,Major,0.0359,0.0863,0.001330,0.1170,0.176,101.004,4 beats
56,0jAfdqv18goRTUxm3ilRjb,"A Boogie Wit da Hoodie, Tyga, Offset",Startender (feat. Offset and Tyga),Highbridge the Label / Atlantic Records,8247911,2019-01-17,192779.0,,,,...,F#/Gb,-4.653,Minor,0.1330,0.0235,0.000000,0.1510,0.506,191.971,4 beats
57,13hvHEstJ4sNbzdroPrPI3,"Dua Lipa, BLACKPINK",Kiss and Make Up,Warner Records,8240958,2019-01-17,190560.0,0.0,"['Dua Lipa', 'BLACKPINK']","['6M2wZ9GZgrQXHCFfjv46we', '41MozSoPIsD1dJM0CL...",...,8.0,-4.383,1.0,0.1460,0.0557,0.000000,0.1890,0.630,99.986,4.0
62,2FUNBaa5DwItJtYEBgAblU,21 Savage,monster,"Slaughter Gang, LLC/Epic Records",8027341,2019-01-17,233040.0,,,,...,A,-6.916,Minor,0.1240,0.1580,0.000228,0.1180,0.224,134.022,4 beats
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41943,7K7MUBCnzgBAvMVW2RTWNs,"Loud Luxury, Brando",Body,Armada Music,5231950,2019-06-13,163216.0,,,,...,C#/Db,-4.399,Major,0.0380,0.0476,0.000094,0.0543,0.582,121.958,4 beats
41951,6fxVffaTuwjgEk5h9QyRjy,Ed Sheeran,Photograph,Atlantic Records UK,5021070,2019-06-13,258987.0,0.0,['Ed Sheeran'],['6eUKZXaKkcviH0Ku9w2n3V'],...,4.0,-10.480,1.0,0.0476,0.6070,0.000464,0.0986,0.201,107.989,4.0
41978,5s8LepdwU0THzpd0M7nLsa,Ozuna,Te Soñé de Nuevo,Aura Music Corp.,4667591,2019-06-13,199813.0,,,,...,G,-2.867,Minor,0.1070,0.0533,0.000000,0.0936,0.773,168.040,4 beats
41986,4Sokm1cWK36H2WctWWRGf1,Ufo361,Irina Shayk,Stay High,4567010,2019-06-13,147452.0,,,,...,D,-9.504,Minor,0.0696,0.1660,0.020900,0.1700,0.201,74.869,4 beats


In [None]:
# Display the number of NaN values for each column in songs_gold
songs_gold_witouht_nan = songs_gold.dropna()

print(songs_gold.isna().sum())
print(songs_gold.shape[0] - songs_gold_witouht_nan.shape[0])
print("{}/{}".format(songs_gold_witouht_nan.shape[0], songs_gold.shape[0]))


track_id               0
artist_names           0
track_name             0
source                 0
streams                0
week_date              0
duration_ms            0
explicit            6966
artists             6966
id_artists          6966
release_date        7404
danceability           0
energy                 0
key                    0
loudness               0
mode                   0
speechiness            0
acousticness           0
instrumentalness       0
liveness               0
valence                0
tempo                  0
time_signature         0
dtype: int64
7404
34591/41995


In [None]:
# Get rows with NaN values in any column for songs_gold
songs_gold_with_nan = songs_gold[songs_gold.isna().any(axis=1)][["track_id", "week_date", "explicit", "artists", "id_artists", "release_date"]]
songs_gold_with_nan


Unnamed: 0,track_id,week_date,explicit,artists,id_artists,release_date
5,25sgk305KZfyuqVBQIahim,2019-01-17,,,,
11,7wFybC8jBH3zE139OpCtpG,2019-01-17,,,,
56,0jAfdqv18goRTUxm3ilRjb,2019-01-17,,,,
57,13hvHEstJ4sNbzdroPrPI3,2019-01-17,0.0,"['Dua Lipa', 'BLACKPINK']","['6M2wZ9GZgrQXHCFfjv46we', '41MozSoPIsD1dJM0CL...",
62,2FUNBaa5DwItJtYEBgAblU,2019-01-17,,,,
...,...,...,...,...,...,...
41943,7K7MUBCnzgBAvMVW2RTWNs,2019-06-13,,,,
41951,6fxVffaTuwjgEk5h9QyRjy,2019-06-13,0.0,['Ed Sheeran'],['6eUKZXaKkcviH0Ku9w2n3V'],
41978,5s8LepdwU0THzpd0M7nLsa,2019-06-13,,,,
41986,4Sokm1cWK36H2WctWWRGf1,2019-06-13,,,,


## Save new gold

In [None]:
# We decide to drop the rows without release date as they are the same rows as teh ones without explicit, artists, id_artists and release_date

In [None]:
print(songs_gold_witouht_nan.isna().sum())

track_id            0
artist_names        0
track_name          0
source              0
streams             0
week_date           0
duration_ms         0
explicit            0
artists             0
id_artists          0
release_date        0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
dtype: int64


In [None]:
# Prepare dataframe for parquet (handles type conversions)
songs_gold_witouht_nan_parquet = prepare_df_for_parquet(songs_gold_witouht_nan)

songs_gold_witouht_nan_parquet.to_parquet(os.path.join(DATA_DIR, "gold", "songs_with_features.parquet"), index=False)


ArrowTypeError: ("Expected bytes, got a 'Timestamp' object", 'Conversion failed for column release_date with type object')

In [None]:
# Functions moved to data_cleaning.artist_mapping module
# Example usage:
all_id_artists = get_unique_ids_from_column(songs, 'id_artists')
print(len(all_id_artists))

# Example usage:
all_names_artists = get_unique_ids_from_column(songs, 'artists')
print(len(all_names_artists))

986
986


In [None]:
# Functions moved to data_cleaning.artist_mapping module
# (get_all_combinations, get_artist_to_id, update_id_artists_with_mapping)

In [None]:
# Generate "artists" column by splitting "artist_names" at ',' and stripping whitespace
songs_gold_2 = songs_gold.copy()
songs_gold_2['artists'] = songs_gold_2['artist_names'].apply(
    lambda x: [artist.strip() for artist in x.split(',')] if isinstance(x, str) else []
)

In [None]:
songs_gold_2 = update_id_artists_with_mapping(songs_gold_2)

print(songs_gold_2.isna().sum())

track_id               0
artist_names           0
track_name             0
source                 0
streams                0
week_date              0
duration_ms            0
explicit            6966
artists                0
id_artists             0
release_date        7404
danceability           0
energy                 0
key                    0
loudness               0
mode                   0
speechiness            0
acousticness           0
instrumentalness       0
liveness               0
valence                0
tempo                  0
time_signature         0
dtype: int64


In [None]:
# Functions moved to data_cleaning.explicit_enrichment module
# (gemini_check_if_explicit, enrich_explicit_via_gemini)

# Example use:
explicit_dict = enrich_explicit_via_gemini(
    songs_gold_2,
    previous_dict_path="data/silver/explicit_cache.json",
    save_dict_path="data/silver/explicit_cache.json"
)

Enriching explicit via Gemini:   0%|          | 0/41995 [00:00<?, ?it/s]

Calling Gemini for 1l9G7M8gNyQsgOGWZGoQsT


Enriching explicit via Gemini:   1%|▏         | 574/41995 [00:14<17:22, 39.75it/s]

Calling Gemini for 2vs5ubqg8QUbfyWZpMfvWe


Enriching explicit via Gemini:   1%|▏         | 578/41995 [00:28<41:34, 16.60it/s]

Calling Gemini for 6Yocr5JJOjupv71R5uhye5


Enriching explicit via Gemini:   1%|▏         | 580/41995 [00:49<1:30:35,  7.62it/s]

Calling Gemini for 4uHscE8VF6F6mKU3ECjIKr


Enriching explicit via Gemini:   1%|▏         | 581/41995 [01:04<2:23:18,  4.82it/s]

Calling Gemini for 6jA8JUuPCGYjFcgw0AoM5T


Enriching explicit via Gemini:   1%|▏         | 587/41995 [01:23<3:45:04,  3.07it/s]

Calling Gemini for 0sPTfDSZgE3KvcvqaKPtEx


Enriching explicit via Gemini:   1%|▏         | 588/41995 [01:36<5:11:20,  2.22it/s]

Calling Gemini for 5BQrp63SHCVf4bzCzJePne


Enriching explicit via Gemini:   1%|▏         | 589/41995 [01:50<7:14:45,  1.59it/s]

Calling Gemini for 4Nip6oiJU24LqcUEi7u19S


Enriching explicit via Gemini:   1%|▏         | 593/41995 [02:03<9:24:15,  1.22it/s]

Calling Gemini for 2EhJijFUs0WUmNClDBpYJq


Enriching explicit via Gemini:   1%|▏         | 597/41995 [02:17<12:21:04,  1.07s/it]

Calling Gemini for 6EpRaXYhGOB3fj4V2uDkMJ


Enriching explicit via Gemini:   1%|▏         | 609/41995 [02:33<13:13:47,  1.15s/it]

Calling Gemini for 79cuOz3SPQTuFrp8WgftAu


Enriching explicit via Gemini:   1%|▏         | 610/41995 [02:47<18:04:33,  1.57s/it]

Calling Gemini for 4aWmUDTfIPGksMNLV2rQP2


Enriching explicit via Gemini:   1%|▏         | 612/41995 [03:06<25:47:54,  2.24s/it]

Calling Gemini for 0qYTZCo5Bwh1nsUFGZP3zn


Enriching explicit via Gemini:   1%|▏         | 624/41995 [03:24<22:14:27,  1.94s/it]

Calling Gemini for 1sCxVKWImDZSZKvG0U9B23


Enriching explicit via Gemini:   1%|▏         | 627/41995 [03:38<26:34:00,  2.31s/it]

Calling Gemini for 167NczpNbRF7oWakJaY3Hh


Enriching explicit via Gemini:   2%|▏         | 634/41995 [03:54<25:59:13,  2.26s/it]

Calling Gemini for 3kxfsdsCpFgN412fpnW85Y


Enriching explicit via Gemini:   2%|▏         | 638/41995 [04:08<29:07:11,  2.53s/it]

Calling Gemini for 2kVHAQKVtczchKctctzbtK


Enriching explicit via Gemini:   2%|▏         | 650/41995 [04:25<23:14:43,  2.02s/it]

Calling Gemini for 22eADXu8DfOAUEDw4vU8qy


Enriching explicit via Gemini:   2%|▏         | 653/41995 [04:41<28:29:13,  2.48s/it]

Calling Gemini for 209gZgcfLq2aUuu51vOWBl


Enriching explicit via Gemini:   2%|▏         | 654/41995 [04:58<38:41:49,  3.37s/it]

Calling Gemini for 2mfUa8bLs2s5N4VaqJZ4lZ


Enriching explicit via Gemini:   2%|▏         | 656/41995 [05:12<45:24:59,  3.96s/it]

Calling Gemini for 6LOCAhFzZiavHcLJucLuDy


Enriching explicit via Gemini:   2%|▏         | 658/41995 [05:27<52:26:35,  4.57s/it]

Calling Gemini for 7LEydI5NfjY6mZfKwYIptZ


Enriching explicit via Gemini:   2%|▏         | 664/41995 [05:43<43:27:35,  3.79s/it]

Calling Gemini for 2Vdub5mY4lad7w64bFPUez


Enriching explicit via Gemini:   2%|▏         | 670/41995 [06:01<39:32:09,  3.44s/it]

Calling Gemini for 3NdDpSvN911VPGivFlV5d0


Enriching explicit via Gemini:   2%|▏         | 672/41995 [06:18<49:02:00,  4.27s/it]

Calling Gemini for 6XOYVSmNDjKUNMXooU4s4z


Enriching explicit via Gemini:   2%|▏         | 682/41995 [06:36<34:02:22,  2.97s/it]

Calling Gemini for 693iqPOQvhI7PobtR8CC8v


Enriching explicit via Gemini:   2%|▏         | 684/41995 [06:51<40:25:39,  3.52s/it]

Calling Gemini for 4okba5wu9mMLXx79DXLKi3


Enriching explicit via Gemini:   2%|▏         | 685/41995 [07:08<54:29:10,  4.75s/it]

Calling Gemini for 6qDF4wWL49CAVbgT7yuHl8


Enriching explicit via Gemini:   2%|▏         | 687/41995 [07:25<62:14:07,  5.42s/it]

Calling Gemini for 6HUnnBwYZqcED1eQztxMBN


Enriching explicit via Gemini:   2%|▏         | 689/41995 [07:38<65:34:12,  5.71s/it]

Calling Gemini for 51PIvodunv6NmX5250zxAh


Enriching explicit via Gemini:   2%|▏         | 691/41995 [07:51<67:48:48,  5.91s/it]

Calling Gemini for 7M16mAuWuFA5rC9HvkXaGx


Enriching explicit via Gemini:   2%|▏         | 692/41995 [08:14<94:42:46,  8.26s/it]

Calling Gemini for 15dnkAHcHC0vrIUlbiPiqU


Enriching explicit via Gemini:   2%|▏         | 694/41995 [08:28<92:03:43,  8.02s/it]

Calling Gemini for 6jmngk8Fwup05JZ6kgXEGX


Enriching explicit via Gemini:   2%|▏         | 695/41995 [08:40<99:02:27,  8.63s/it]

Calling Gemini for 6kig1UFggPUyZBCvXD3Wod


Enriching explicit via Gemini:   2%|▏         | 696/41995 [08:54<112:20:58,  9.79s/it]

Calling Gemini for 4pdPtRcBmOSQDlJ3Fk945m


Enriching explicit via Gemini:   2%|▏         | 700/41995 [09:08<73:52:48,  6.44s/it] 

Calling Gemini for 12o6dxWGk29uIAP3OWY0pM


Enriching explicit via Gemini:   2%|▏         | 701/41995 [09:25<91:51:58,  8.01s/it]

Calling Gemini for 0gbBzIqrECJOEPvQJIBFs5


Enriching explicit via Gemini:   2%|▏         | 706/41995 [09:40<61:04:43,  5.33s/it]

Calling Gemini for 61WbtB6ujkpNAsAf5LjF4b


Enriching explicit via Gemini:   2%|▏         | 709/41995 [09:56<62:10:26,  5.42s/it]

Calling Gemini for 3caMfJGFp53NAH2TuigdNj


Enriching explicit via Gemini:   2%|▏         | 710/41995 [10:11<75:53:55,  6.62s/it]

Calling Gemini for 4gqMQftUs22F8pOGVn5Acr


Enriching explicit via Gemini:   2%|▏         | 713/41995 [10:29<72:49:58,  6.35s/it]

Calling Gemini for 7lRsNbdOGykkMAgsqs4R1C


Enriching explicit via Gemini:   2%|▏         | 714/41995 [10:45<89:19:51,  7.79s/it]

Calling Gemini for 5NNlUMcOEOdoOIwwaWXv0k


Enriching explicit via Gemini:   2%|▏         | 720/41995 [10:57<52:18:04,  4.56s/it]

Calling Gemini for 6520aj0B4FSKGVuKNsOCOi


Enriching explicit via Gemini:   2%|▏         | 723/41995 [11:09<51:05:14,  4.46s/it]

Calling Gemini for 4pLwZjInHj3SimIyN9SnOz


Enriching explicit via Gemini:   2%|▏         | 731/41995 [11:24<35:35:32,  3.11s/it]

Calling Gemini for 6DNtNfH8hXkqOX1sjqmI7p


Enriching explicit via Gemini:   2%|▏         | 732/41995 [11:38<46:19:13,  4.04s/it]

Calling Gemini for 3E2Zh20GDCR9B1EYjfXWyv


Enriching explicit via Gemini:   2%|▏         | 735/41995 [11:55<50:33:02,  4.41s/it]

Calling Gemini for 7bJwvubZZaoGE1AGEfu8Fi


Enriching explicit via Gemini:   2%|▏         | 736/41995 [12:07<61:14:05,  5.34s/it]

Calling Gemini for 54h8xHewdRPnYzmbEHBhOO


Enriching explicit via Gemini:   2%|▏         | 737/41995 [12:21<74:33:34,  6.51s/it]

Calling Gemini for 04DwTuZ2VBdJCCC5TROn7L


Enriching explicit via Gemini:   2%|▏         | 739/41995 [12:34<74:43:17,  6.52s/it]

Calling Gemini for 7vAXN57IQ0sj049wZGIboE


Enriching explicit via Gemini:   2%|▏         | 740/41995 [12:48<88:26:47,  7.72s/it]

Calling Gemini for 17Fd6Yb7mSbinKG8LoWfFl


Enriching explicit via Gemini:   2%|▏         | 742/41995 [13:03<87:31:47,  7.64s/it]

Calling Gemini for 4Q5yMlwAfAoitqg4r9oZHN


Enriching explicit via Gemini:   2%|▏         | 749/41995 [13:15<45:34:26,  3.98s/it]

Calling Gemini for 6mORGLOz79w6VsCRLWYYuK


Enriching explicit via Gemini:   2%|▏         | 750/41995 [13:28<57:49:50,  5.05s/it]

Calling Gemini for 6Pw3Gvw4wNMZfTYZa6IQal


Enriching explicit via Gemini:   2%|▏         | 752/41995 [13:46<69:03:40,  6.03s/it]

Calling Gemini for 5i50gKdLAjjIr7UxRT5IVy


Enriching explicit via Gemini:   2%|▏         | 753/41995 [13:57<76:58:44,  6.72s/it]

Calling Gemini for 3hB5DgAiMAQ4DzYbsMq1IT


Enriching explicit via Gemini:   2%|▏         | 755/41995 [14:10<75:52:39,  6.62s/it]

Calling Gemini for 1sBhJGPUKrdRycgKxve70v


Enriching explicit via Gemini:   2%|▏         | 756/41995 [14:23<89:29:19,  7.81s/it]

Calling Gemini for 5hYTyyh2odQKphUbMqc5gN


Enriching explicit via Gemini:   2%|▏         | 761/41995 [14:36<55:24:55,  4.84s/it]

Calling Gemini for 4TZy1wLyHec06pwgFYDh1a


Enriching explicit via Gemini:   2%|▏         | 762/41995 [14:50<70:38:09,  6.17s/it]

Calling Gemini for 1wC93sZRGcTgKLN2UWRSwW


Enriching explicit via Gemini:   2%|▏         | 763/41995 [15:10<95:31:06,  8.34s/it]

Calling Gemini for 000xQL6tZNLJzIrtIgxqSl


Enriching explicit via Gemini:   2%|▏         | 767/41995 [15:21<64:57:05,  5.67s/it]

Calling Gemini for 4J2RaG4UH2HCThcqzDpeAF


Enriching explicit via Gemini:   2%|▏         | 768/41995 [15:35<79:12:43,  6.92s/it]

Calling Gemini for 5MFzQMkrl1FOOng9tq6R9r


Enriching explicit via Gemini:   2%|▏         | 770/41995 [15:52<84:28:13,  7.38s/it]

Calling Gemini for 5J1c3M4EldCfNxXwrwt8mT


Enriching explicit via Gemini:   2%|▏         | 771/41995 [16:11<108:38:11,  9.49s/it]

Calling Gemini for 69bp2EbF7Q2rqc5N3ylezZ


Enriching explicit via Gemini:   2%|▏         | 777/41995 [16:25<58:48:22,  5.14s/it] 

Calling Gemini for 3sJHkcxojoJD2Ytbo1yLLF


Enriching explicit via Gemini:   2%|▏         | 781/41995 [16:39<52:26:34,  4.58s/it]

Calling Gemini for 7LJjWqhqK594nN7qJzLVXE


Enriching explicit via Gemini:   2%|▏         | 782/41995 [16:55<67:13:35,  5.87s/it]

Calling Gemini for 4WjH9Bzt3kx7z8kl0awxh4


Enriching explicit via Gemini:   2%|▏         | 784/41995 [17:10<71:25:18,  6.24s/it]

Calling Gemini for 2aksifNn5ph8igDOkPBA02


Enriching explicit via Gemini:   2%|▏         | 786/41995 [17:28<79:19:22,  6.93s/it]

Calling Gemini for 7Fa5UNizycSms5jP3SQD3F


Enriching explicit via Gemini:   2%|▏         | 787/41995 [17:39<86:47:03,  7.58s/it]

Calling Gemini for 6rQSrBHf7HlZjtcMZ4S4bO


Enriching explicit via Gemini:   2%|▏         | 789/41995 [17:53<85:51:06,  7.50s/it]

Calling Gemini for 3BY2mafsbsoKGqS380Xnuz


Enriching explicit via Gemini:   2%|▏         | 790/41995 [18:03<89:54:24,  7.85s/it]

Calling Gemini for 0FuTx2s3YH1ppmtiM6l0zI


Enriching explicit via Gemini:   2%|▏         | 791/41995 [18:16<103:31:24,  9.04s/it]

Calling Gemini for 5OOkp4U9P9oL23maHFHL1h


Enriching explicit via Gemini:   2%|▏         | 792/41995 [18:32<119:52:46, 10.47s/it]

Calling Gemini for 6stYbAJgTszHAHZMPxWWCY


Enriching explicit via Gemini:   2%|▏         | 793/41995 [18:51<144:43:12, 12.64s/it]

Calling Gemini for 1CUVN2kn7mW5FjkqXTR2W1


Enriching explicit via Gemini:   2%|▏         | 794/41995 [19:07<153:20:46, 13.40s/it]

Calling Gemini for 2Ce5IyMlVRVvN997ZJjJJA


Enriching explicit via Gemini:   2%|▏         | 795/41995 [19:20<154:25:49, 13.49s/it]

Calling Gemini for 5Ua3GXyHwiSfpNTMjq6m2z


Enriching explicit via Gemini:   2%|▏         | 797/41995 [19:41<138:06:16, 12.07s/it]

Calling Gemini for 0ZExP2I5Ej92PsnG4G1jVG


Enriching explicit via Gemini:   2%|▏         | 798/41995 [19:57<149:34:46, 13.07s/it]

Calling Gemini for 6VHdcVbiKLYYOcMbIqBcuK


Enriching explicit via Gemini:   2%|▏         | 799/41995 [20:11<153:16:56, 13.39s/it]

Calling Gemini for 3dQDid3IUNhZy1OehIfYfE


Enriching explicit via Gemini:   2%|▏         | 800/41995 [20:30<169:14:44, 14.79s/it]

Calling Gemini for 6XXYdF6pJR1K3wKvuxmu7n


Enriching explicit via Gemini:   2%|▏         | 842/41995 [20:46<14:05:19,  1.23s/it] 

Calling Gemini for 7HMmFQsKsljwTw8bS7lu19


Enriching explicit via Gemini:   2%|▏         | 880/41995 [21:01<8:48:16,  1.30it/s] 

Calling Gemini for 5RUUkFNqH3GRKHg1xjprOb


Enriching explicit via Gemini:   2%|▏         | 903/41995 [21:16<8:25:54,  1.35it/s]

Calling Gemini for 4iiWcajF1fEUpwcUewc464


Enriching explicit via Gemini:   2%|▏         | 928/41995 [21:18<5:53:34,  1.94it/s]

Calling Gemini for 74fV8TuLZKVzSIOOGu8wwI


Enriching explicit via Gemini:   2%|▏         | 949/41995 [21:33<6:27:19,  1.77it/s]

Calling Gemini for 4yJiXq86uM56uIfIZgE440


Enriching explicit via Gemini:   2%|▏         | 950/41995 [21:47<9:24:16,  1.21it/s]

Calling Gemini for 4dZTZbmJBCCofAExHwefda


Enriching explicit via Gemini:   2%|▏         | 982/41995 [22:00<7:11:58,  1.58it/s]

Calling Gemini for 1G2CAJeP7rCwOZjlSJ0Zw3


Enriching explicit via Gemini:   2%|▏         | 984/41995 [22:16<10:34:56,  1.08it/s]

Calling Gemini for 3VFT90E0yYjdHSFoGv02us


Enriching explicit via Gemini:   2%|▏         | 986/41995 [22:30<14:11:30,  1.25s/it]

Calling Gemini for 1mF6sMUsRU9MisrdB9qWvg


Enriching explicit via Gemini:   2%|▏         | 996/41995 [22:49<16:18:33,  1.43s/it]

Calling Gemini for 25C5CowdsfXld2jJanbiex


Enriching explicit via Gemini:   2%|▏         | 1013/41995 [23:04<13:51:01,  1.22s/it]

Calling Gemini for 0XLOf9LhyazPX9Ld8jPiUq


Enriching explicit via Gemini:   2%|▏         | 1019/41995 [23:27<18:54:30,  1.66s/it]

Calling Gemini for 7aF09WaavZAmAWuUeYxlYD


Enriching explicit via Gemini:   3%|▎         | 1054/41995 [23:43<10:49:52,  1.05it/s]

Calling Gemini for 1CKd1PnqfYx0H12V9MSaIY


Enriching explicit via Gemini:   3%|▎         | 1069/41995 [23:53<9:59:53,  1.14it/s] 

Calling Gemini for 2nYwV62UABTH0NanrwVIdQ


Enriching explicit via Gemini:   3%|▎         | 1084/41995 [24:05<9:48:18,  1.16it/s]

Calling Gemini for 4xBjsLUSjcx2h7PcRikpQp


Enriching explicit via Gemini:   3%|▎         | 1090/41995 [24:22<12:43:43,  1.12s/it]

Calling Gemini for 74ODGXs4byLTRdnKGM4s6E


Enriching explicit via Gemini:   3%|▎         | 1116/41995 [24:41<10:37:09,  1.07it/s]

Calling Gemini for 0XfOi87h82i09rQjWYG0Bm


Enriching explicit via Gemini:   3%|▎         | 1126/41995 [24:58<12:25:03,  1.09s/it]

Calling Gemini for 7F1LWA9sIlTorHUo4amGqk


Enriching explicit via Gemini:   3%|▎         | 1132/41995 [25:12<14:31:36,  1.28s/it]

Calling Gemini for 00lNx0OcTJrS3MKHcB80HY


Enriching explicit via Gemini:   3%|▎         | 1137/41995 [25:27<17:21:28,  1.53s/it]

Calling Gemini for 2jYmK1cb3Uya6FFsocdpzO


Enriching explicit via Gemini:   3%|▎         | 1160/41995 [25:40<12:01:58,  1.06s/it]

Calling Gemini for 5xMzazov0IHYllrc8U2K6F


Enriching explicit via Gemini:   3%|▎         | 1175/41995 [25:55<11:43:41,  1.03s/it]

Calling Gemini for 0RYXEibxMami3QuT8kn4Iz


Enriching explicit via Gemini:   3%|▎         | 1176/41995 [26:11<16:40:46,  1.47s/it]

Calling Gemini for 53mrVsi49rLHIaKBiSvElG


Enriching explicit via Gemini:   3%|▎         | 1179/41995 [26:25<20:42:38,  1.83s/it]

Calling Gemini for 61HHDBLqF3AmSvLfHKNGd2


Enriching explicit via Gemini:   3%|▎         | 1199/41995 [26:44<15:31:36,  1.37s/it]

Calling Gemini for 2of5xn0GU0TdFneR1saRLH


Enriching explicit via Gemini:   3%|▎         | 1254/41995 [26:56<6:51:35,  1.65it/s] 

Calling Gemini for 6dVZCbi9CYtD9LjAHXRjIG


Enriching explicit via Gemini:   3%|▎         | 1335/41995 [27:27<5:20:37,  2.11it/s]

Calling Gemini for 0fg8CqpjdojMyXLNzM2PaJ


Enriching explicit via Gemini:   3%|▎         | 1376/41995 [27:40<4:50:10,  2.33it/s]

Calling Gemini for 5bZtRlMBU76vHuDOb1GM5u


Enriching explicit via Gemini:   3%|▎         | 1378/41995 [27:56<6:30:57,  1.73it/s]

Calling Gemini for 4b4KcovePX8Ke2cLIQTLM0


Enriching explicit via Gemini:   3%|▎         | 1381/41995 [28:12<8:52:15,  1.27it/s]

Calling Gemini for 7hCcLbNDCVPNqAwuoutJ31


Enriching explicit via Gemini:   3%|▎         | 1384/41995 [28:29<11:51:28,  1.05s/it]

Calling Gemini for 7yHEDfrJNd0zWOfXwydNH0


Enriching explicit via Gemini:   3%|▎         | 1389/41995 [28:47<15:12:48,  1.35s/it]

Calling Gemini for 0wfbD5rAksdXUzRvMfM3x5


Enriching explicit via Gemini:   3%|▎         | 1391/41995 [29:03<19:54:53,  1.77s/it]

Calling Gemini for 3QWjljChcOMkRDYSzF33Qr


Enriching explicit via Gemini:   3%|▎         | 1397/41995 [29:17<21:17:12,  1.89s/it]

Calling Gemini for 11EDhDAVDtGPoSar6ootYA


Enriching explicit via Gemini:   4%|▎         | 1513/41995 [29:34<4:30:07,  2.50it/s] 

Calling Gemini for 4ZJPwET9Jrgpkqi4Vo3Yg8


Enriching explicit via Gemini:   4%|▎         | 1545/41995 [29:46<4:25:25,  2.54it/s]

Calling Gemini for 42NpUvd0I2kdIuMXX4nobE


Enriching explicit via Gemini:   4%|▎         | 1567/41995 [30:04<5:23:27,  2.08it/s]

Calling Gemini for 5pvVAwQbuFoR7LkcicrKnk


Enriching explicit via Gemini:   4%|▎         | 1568/41995 [30:18<7:19:05,  1.53it/s]

Calling Gemini for 2rUwQj4SWaP2anuGDtNpYR


Enriching explicit via Gemini:   4%|▍         | 1581/41995 [30:35<8:42:30,  1.29it/s]

Calling Gemini for 0OVhQZkNe7lh0fQeH96EFW


Enriching explicit via Gemini:   4%|▍         | 1583/41995 [30:49<11:29:05,  1.02s/it]

Calling Gemini for 1FUViuNSldssMIawrOXF2i


Enriching explicit via Gemini:   4%|▍         | 1584/41995 [31:03<15:37:39,  1.39s/it]

Calling Gemini for 5ZnJsoJGkQ5K15W7Rcr5JO


Enriching explicit via Gemini:   4%|▍         | 1596/41995 [31:16<14:31:57,  1.30s/it]

Calling Gemini for 1D3ODoXHBLpdxolZRHWV1j


Enriching explicit via Gemini:   4%|▍         | 1597/41995 [31:31<20:11:38,  1.80s/it]

Calling Gemini for 4L7Y2OAPiY2I2GndZjAs5K


Enriching explicit via Gemini:   4%|▍         | 1600/41995 [31:46<25:17:44,  2.25s/it]

Calling Gemini for 0lLdorYw7lVrJydTINhWdI


Enriching explicit via Gemini:   4%|▍         | 1616/41995 [31:57<16:08:33,  1.44s/it]

Calling Gemini for 6tjituizSxwSmBB5vtgHZE


Enriching explicit via Gemini:   4%|▍         | 1660/41995 [32:10<7:31:59,  1.49it/s] 

Calling Gemini for 1Qi2wh8fFgDV7tl4Sj3f2K


Enriching explicit via Gemini:   4%|▍         | 1665/41995 [32:22<9:29:45,  1.18it/s]

Calling Gemini for 4KX8vXbouybtUptEyYxtIk


Enriching explicit via Gemini:   4%|▍         | 1697/41995 [32:36<7:23:43,  1.51it/s]

Calling Gemini for 5Ber68jZ7ytegr2UISEdb7


Enriching explicit via Gemini:   4%|▍         | 1711/41995 [32:50<8:08:57,  1.37it/s]

Calling Gemini for 5Q2P43CJra0uRAogjHyJDK


Enriching explicit via Gemini:   4%|▍         | 1717/41995 [33:08<11:12:06,  1.00s/it]

Calling Gemini for 6YJdPrH3i2POzu7hdHIRrb


Enriching explicit via Gemini:   4%|▍         | 1734/41995 [33:26<11:34:12,  1.03s/it]

Calling Gemini for 2SBr3MK1sdMs8IxdRcN6qz


Enriching explicit via Gemini:   4%|▍         | 1748/41995 [33:46<12:50:21,  1.15s/it]

Calling Gemini for 1hy6kKvsPbv7VTcllCuATC


Enriching explicit via Gemini:   4%|▍         | 1765/41995 [33:59<11:17:39,  1.01s/it]

Calling Gemini for 78pn8k7RogKo2oxl0DyX6d


Enriching explicit via Gemini:   4%|▍         | 1782/41995 [34:11<10:13:23,  1.09it/s]

Calling Gemini for 3AlpkljBS1AU7HFVPms8K6


Enriching explicit via Gemini:   4%|▍         | 1784/41995 [34:25<13:35:47,  1.22s/it]

Calling Gemini for 7uqcLJFlSYjVPLUz1vPxkY


Enriching explicit via Gemini:   4%|▍         | 1797/41995 [34:42<14:01:26,  1.26s/it]

Calling Gemini for 1tT55K6VEyO6XFDxK4lDQe


Enriching explicit via Gemini:   4%|▍         | 1799/41995 [34:57<19:52:08,  1.78s/it]

Calling Gemini for 3DoBTwfr8yi2LN08SBpFkN


Enriching explicit via Gemini:   4%|▍         | 1827/41995 [34:59<7:39:38,  1.46it/s] 

Calling Gemini for 3Wf2YGdYT8xVdNsQSoRKk9


Enriching explicit via Gemini:   4%|▍         | 1828/41995 [35:13<12:25:39,  1.11s/it]

Calling Gemini for 5lZZmityu9TAjNvSY6GLhR


Enriching explicit via Gemini:   4%|▍         | 1839/41995 [35:27<12:54:33,  1.16s/it]

Calling Gemini for 5ffntNJnJOeFv7b7DCd0Bw


Enriching explicit via Gemini:   4%|▍         | 1848/41995 [35:41<14:21:29,  1.29s/it]

Calling Gemini for 1ZemPoaj7LBj1RAQrb89EC


Enriching explicit via Gemini:   4%|▍         | 1862/41995 [35:56<13:18:21,  1.19s/it]

Calling Gemini for 7Fg342AJtNsIDdwCfX0paC


Enriching explicit via Gemini:   4%|▍         | 1863/41995 [36:15<20:52:31,  1.87s/it]

Calling Gemini for 73F87Sqh6jQWucOOvz1WFx


Enriching explicit via Gemini:   4%|▍         | 1866/41995 [36:29<25:13:39,  2.26s/it]

Calling Gemini for 6WbADFqMvR8N5u0BvtsWQE


Enriching explicit via Gemini:   4%|▍         | 1868/41995 [36:42<30:39:08,  2.75s/it]

Calling Gemini for 44KKhCyI0BGS6LvVyDqJtH


Enriching explicit via Gemini:   4%|▍         | 1885/41995 [36:55<17:50:52,  1.60s/it]

Calling Gemini for 3pWcj8hxj3fsb9TDAcMpEl


Enriching explicit via Gemini:   4%|▍         | 1887/41995 [37:09<23:00:44,  2.07s/it]

Calling Gemini for 4ofvoCdXq99vpMZO4Cfp8x


Enriching explicit via Gemini:   5%|▍         | 1901/41995 [37:23<17:31:26,  1.57s/it]

Calling Gemini for 1oUudyBYVGMIC8JXeIsFzq


Enriching explicit via Gemini:   5%|▍         | 1906/41995 [37:37<20:01:39,  1.80s/it]

Calling Gemini for 3ITvHA9zhZZdBJsOsAUegF


Enriching explicit via Gemini:   5%|▍         | 1909/41995 [37:52<24:59:25,  2.24s/it]

Calling Gemini for 3uYDO9dPLTVrgfwg7EYXSf


Enriching explicit via Gemini:   5%|▍         | 1916/41995 [38:03<22:31:21,  2.02s/it]

Calling Gemini for 2HGymMoOdB4ZYv0AAqYmgF


Enriching explicit via Gemini:   5%|▍         | 1918/41995 [38:06<13:16:14,  1.19s/it]


KeyboardInterrupt: 