In [2]:
# environment setup
import sys
sys.path.append('../../Data')

In [77]:
import data_api
import importlib

In [78]:
importlib.reload(data_api)

<module 'data_api' from '../../Data/data_api.py'>

In [52]:
# import necessary packages
import json
from data_api import fetch_high_quality_users, get_user_feature
import csv
import datetime
import math
import openai
from user_algorithm import calculate_recent_tracks_score, calculate_top_tracks_score, calculate_top_artists_score, fetch_user_tag
import pandas as pd


In [82]:
fetch_user_tag({   "Maroon 5": 1,
    "Bruno Mars": 1,
    "Taylor Swift": 1,
    "Junjie Lin": 1,
    "Jay Zhou": 1,
    "Micheal Jackson": 1,  # Note: There's a typo in "Michael" Jackson
    "B.B. King": 1,
    "Muddy Waters": 1,
    "Howlin' Wolf": 1,
    "Etta James": 1})

'Pop: 0.8, R&B: 0.7, Blues: 0.6, Mandopop: 0.5, Country: 0.4'

In [None]:
def get_artist_name(artist_id, conn):
    cursor = conn.cursor()
    cursor.execute("SELECT artist_name FROM Artists WHERE artist_id = ?", (artist_id,))
    result = cursor.fetchone()
    if result:
        return result[0]
    return None

def transform_artist_feature_to_names(artist_feature, conn):
    """
    Transform the artist feature from artist IDs to artist names
    :param artist_feature: the artist feature in the format of {artist_id: score}
    :param conn: the connection to the database
    :return: the artist feature in the format of {artist_name: score}
    """
    artist_feature_names = {}
    for artist_id in artist_feature:
        artist_name = get_artist_name(artist_id, conn)
        if artist_name:
            artist_feature_names[artist_name] = artist_feature[artist_id]
    return artist_feature_names


In [63]:
def write_features_to_csv(conn, output_file):
    """
    Write all the features of users into a csv file
    :param conn: the connection to the database
    :param output_file: the output file name
    :return: None
    """
    all_users = fetch_high_quality_users(conn)
    with open(output_file, 'w', newline='') as csvfile:
        fieldnames = ['user_id', 'Recent Tracks', 'Top Tracks', 'Top Artists']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        i = 0
        while i < len(all_users):
            print("Fetching data for user " + str(i))
            try:
                user_id = all_users[i]
                features = get_user_feature(conn, user_id)

                # Get the feature information
                recent_tracks = features['Recent Tracks']
                top_tracks = features['Top Tracks']
                top_artists = features['Top Artists']
                top_artists_names = transform_artist_feature_to_names(top_artists, conn)



                # Convert the feature information into feature scores
                recent_tracks_score = calculate_recent_tracks_score(recent_tracks)
                top_tracks_score = calculate_top_tracks_score(top_tracks)
                top_artists_score = calculate_top_artists_score(top_artists)
                top_tags = fetch_user_tag(top_artists_names)



                # Convert the feature scores into the JSON format
                json_recent_tracks_score = json.dumps(recent_tracks_score)
                json_top_tracks_score = json.dumps(top_tracks_score)
                json_top_artists_score = json.dumps(top_artists_score)
                json_top_tags = json.dumps(top_tags)

                writer.writerow({
                    'user_id': user_id,
                    'Recent Tracks': json_recent_tracks_score, #dict{track_id: score}
                    'Top Tracks': json_top_tracks_score,
                    'Top Artists': json_top_artists_score,
                    'Top Tags': json_top_tags
                })
                i += 1

            except Exception as e:
                print(e)
                print("Error: unable to fetch data for user " + str(user_id))
                writer.writerow({
                    'user_id': user_id,
                    'Recent Tracks': None,
                    'Top Tracks': None,
                    'Top Artists': None,
                    'Top Tags': None
                })
                i += 1
                continue






In [80]:
def recent_track_feature_csv(all_feature_csv):
    df = pd.read_csv(all_feature_csv)

    # Extract all unique track IDs from the 'Recent Tracks' column
    all_track_ids = set()
    for recent_tracks in df['Recent Tracks']:
        all_track_ids.update(eval(recent_tracks).keys())

    recent_track_table = pd.DataFrame()
    recent_track_table["user_id"] = df["user_id"]

    for track_id in all_track_ids:
        recent_track_table[f"Recent Track Score {track_id}"] = None

        for idx, row in df.iterrows():
            user_recent_tracks = eval(row['Recent Tracks'])
            if track_id in user_recent_tracks:
                recent_track_table.at[idx, f"Recent Track Score {track_id}"] = user_recent_tracks[track_id]

    # Save this new table to a CSV file
    new_table_csv_path = "/user_features/recent_track.csv"
    recent_track_table.to_csv(new_table_csv_path, index=False)

def top_track_feature_csv(all_feature_csv):
    df = pd.read_csv(all_feature_csv)

    # Extract all unique track IDs from the 'Top Tracks' column
    all_track_ids = set()
    for top_tracks in df['Top Tracks']:
        all_track_ids.update(eval(top_tracks).keys())

    top_track_table = pd.DataFrame()
    top_track_table["user_id"] = df["user_id"]

    for track_id in all_track_ids:
        top_track_table[f"Top Track Score {track_id}"] = None

        for idx, row in df.iterrows():
            user_top_tracks = eval(row['Top Tracks'])
            if track_id in user_top_tracks:
                top_track_table.at[idx, f"Top Track Score {track_id}"] = user_top_tracks[track_id]

    # Save this new table to a CSV file
    new_table_csv_path = "/user_features/top_track.csv"
    top_track_table.to_csv(new_table_csv_path, index=False)

def top_artist_feature_csv(all_feature_csv):
    df = pd.read_csv(all_feature_csv)

    # Extract all unique artist IDs from the 'Top Artists' column
    all_artist_ids = set()
    for top_artists in df['Top Artists']:
        all_artist_ids.update(eval(top_artists).keys())

    top_artist_table = pd.DataFrame()
    top_artist_table["user_id"] = df["user_id"]

    for artist_id in all_artist_ids:
        top_artist_table[f"Top Artist Score {artist_id}"] = None

        for idx, row in df.iterrows():
            user_top_artists = eval(row['Top Artists'])
            if artist_id in user_top_artists:
                top_artist_table.at[idx, f"Top Artist Score {artist_id}"] = user_top_artists[artist_id]

    # Save this new table to a CSV file
    new_table_csv_path = "/user_features/top_artist.csv"
    top_artist_table.to_csv(new_table_csv_path, index=False)

def tag_feature_csv(all_feature_csv):
    df = pd.read_csv(all_feature_csv)

    # Extract all unique tags from the 'Top Tags' column
    all_tags = set()
    for tags_str in df['Top Tags']:
        tags = [tag_score.split(":")[0].strip() for tag_score in tags_str.split(",")]
        all_tags.update(tags)

    top_tags_table = pd.DataFrame()
    top_tags_table["user_id"] = df["user_id"]

    for tag in all_tags:
        top_tags_table[f"Top Tag Score {tag}"] = None

        for idx, row in df.iterrows():
            tags_dict = {tag_score.split(":")[0].strip(): float(tag_score.split(":")[1].strip())
                         for tag_score in row['Top Tags'].split(",")}
            if tag in tags_dict:
                top_tags_table.at[idx, f"Top Tag Score {tag}"] = tags_dict[tag]

    # Save this new table to a CSV file
    new_table_csv_path = "/user_features/tags.csv"
    top_tags_table.to_csv(new_table_csv_path, index=False)

