In [7]:
import pandas as pd
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance

# Config
VECTOR_COLS = [
    "danceability", "energy", "loudness", "speechiness",
    "acousticness", "instrumentalness", "liveness",
    "valence", "tempo"
]
COLLECTION_NAME = "music_recommendation"

# Load data
audio_df = pd.read_csv("track_features.csv")
audio_df["vector"] = audio_df[VECTOR_COLS].values.tolist()

# Load metadata
meta_df = pd.read_csv("track_metadata.csv",  on_bad_lines='skip')

# Init client
client = QdrantClient(host="qdrant", port=6333)

def create_collection():
    client.recreate_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(
            size=len(VECTOR_COLS),
            distance=Distance.COSINE
        )
    )
    points = [
        PointStruct(
            id=int(i),
            vector=row["vector"],
            payload={"track_id": row["track_id"]}
        )
        for i, row in audio_df.iterrows()
    ]
    client.upsert(collection_name=COLLECTION_NAME, points=points)

def search_similar_tracks(track_id, k=5):
    vector = audio_df[audio_df["track_id"] == track_id]["vector"].values[0]
    results = client.search(
        collection_name=COLLECTION_NAME,
        query_vector=vector,
        limit=k + 1
    )
    return [
        r.payload["track_id"]
        for r in results
        if r.payload["track_id"] != track_id
    ][:k]

def get_track_info(track_id):
    return meta_df[meta_df["track_id"] == track_id].iloc[0].to_dict()

def search_by_name(name):
    return meta_df[meta_df["track_name"].str.contains(name, case=False)]


  meta_df = pd.read_csv("track_metadata.csv",  on_bad_lines='skip')


In [9]:
import streamlit as st

st.set_page_config(page_title="Music Recommender", layout="wide")
st.title("🎧 Music Recommendation System (Qdrant)")

search_query = st.text_input("🔍 Search for a song")

if search_query:
    results = search_by_name(search_query)
    if len(results) == 0:
        st.warning("No matching tracks found.")
    else:
        selected = st.selectbox("Select a track", results["track_name"])
        track_id = results[results["track_name"] == selected]["track_id"].values[0]
        main_track = get_track_info(track_id)

        # Display current song
        st.markdown("### ▶️ Now Playing")
        col1, col2 = st.columns([1, 3])
        with col1:
            st.image(main_track["image_album"], use_column_width=True)
        with col2:
            st.markdown(f"**🎵 {main_track['track_name']}**")
            st.markdown(f"*🎤 {main_track['artist_name']}*")
            st.markdown(f"🔗 [Spotify Link]({main_track.get('external_urls_tracks', '#')})")

        # Show recommendations
        st.markdown("### 🎶 Similar Recommendations")
        similar_ids = search_similar_tracks(track_id)

        rec_cols = st.columns(5)
        for i, sid in enumerate(similar_ids):
            info = get_track_info(sid)
            with rec_cols[i % 5]:
                st.image(info.get("image_album", ""), use_column_width=True)
                st.markdown(f"**{info['track_name']}**", help=info.get("artist_name", ""))




In [8]:
meta_df.count()

track_id                 303552
track_name               303552
external_urls_tracks     303551
track_artist_id          303546
artist_name              303541
followers number         303540
images_artists           303539
external_urls_artists    303540
track_album_id           303539
album_name               303539
release_date             303539
image_album              303513
external_urls_albums     303539
duration_ms              303539
explicit                 303539
genres                   303539
dtype: int64

In [1]:
import pandas as pd

features_df = pd.read_csv("track_features.csv")
meta_df = pd.read_csv("track_metadata.csv", on_bad_lines='skip', low_memory=False)
meta_df = meta_df.drop_duplicates(subset="track_name", keep="first")

merged_df = pd.merge(features_df, meta_df, on="track_id")
merged_df.to_csv("track_full.csv", index=False)