# Recommendation System

In this notebook, I:

1. Train a simple SVD model. I will use it to generate K song recommendations.

2. Finally, I apply an XGBoost reranker that will rerank songs based on the number of plays they are likely to get.

In [1]:
!pip install "numpy<2" --upgrade
!pip install --force-reinstall --no-deps scikit-surprise

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2469548 sha256=55ff512c73f35d5659ca9e32050dd1a9c4100a39bd25a4f4efc69a23f6b6bfbc
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


# SVD Implementation

In [19]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from collections import defaultdict

class MusicRecommender:
    def __init__(self):
        self.model = None
        self.trainset = None
        self.data = None

    def prepare_data(self, triplets_path='triplets_cleaned.csv'):
        df = pd.read_csv(triplets_path)

        # log minmax normalization
        df['rating'] = np.log1p(df['listen_count'])
        min_rating = df['rating'].min()
        max_rating = df['rating'].max()
        if max_rating == min_rating:
            df['rating'] = 3.0
        else:
            df['rating'] = 1 + 4 * (df['rating'] - min_rating) / (max_rating - min_rating)

        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(df[['user_id', 'song_id', 'rating']], reader)
        self.data = data
        return data, df

    def train(self, data=None):
        data = self.data
        self.trainset = data.build_full_trainset()
        self.model = SVD(n_factors=50, n_epochs=20, random_state=42)
        self.model.fit(self.trainset)

        print("SVD Model trained successfully!")
        print(f"Trained on {self.trainset.n_users} users and {self.trainset.n_items} items")

    def get_recommendations(self, user_id, n_recommendations=50):
        inner_uid = self.trainset.to_inner_uid(user_id)

        user_items = set([iid for (uid, iid) in self.trainset.ur[inner_uid]])
        all_items = set(self.trainset.all_items())
        items_to_predict = list(all_items - user_items)

        predictions = []
        for iid in items_to_predict:
            raw_iid = self.trainset.to_raw_iid(iid)
            pred = self.model.predict(user_id, raw_iid)
            predictions.append({
                'user_id': user_id,
                'song_id': pred.iid,
                'predicted_score': pred.est
            })

        predictions_df = pd.DataFrame(predictions)
        predictions_df = predictions_df.sort_values('predicted_score', ascending=False)
        return predictions_df.head(n_recommendations)

    def get_user_item_score(self, user_id, song_id):
        pred = self.model.predict(user_id, song_id)
        return pred.est

# XGBoost Reranker

In [20]:
import pandas as pd
import numpy as np
import json
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from surprise import Dataset, Reader, SVD
import time
import pickle

class SafetyReranker:
    def __init__(self, categories=None):
        self.model = None
        self.feature_cols = None
        self.categories = categories or [
            'sexual', 'harassment', 'harassment_threatening', 'hate',
            'hate_threatening', 'illicit', 'illicit_violent', 'violence', 'violence_graphic'
        ]

    def batch_get_svd_scores(self, recommender_model, triplets_df):
        svd_model = recommender_model.model
        trainset = recommender_model.trainset

        '''
        #Instead of calling model.predict() thousands of times (once per row),
        directly implements the SVD formula in vectorized numpy operations, computing all predictions at once.
        '''

        user_raw_to_inner = trainset._raw2inner_id_users
        item_raw_to_inner = trainset._raw2inner_id_items

        inner_uids = triplets_df['user_id'].map(lambda x: user_raw_to_inner.get(x, -1))
        inner_iids = triplets_df['song_id'].map(lambda x: item_raw_to_inner.get(x, -1))

        global_mean = trainset.global_mean
        user_biases = np.append(svd_model.bu, [0])
        item_biases = np.append(svd_model.bi, [0])

        dummy_factor_row = np.zeros((1, svd_model.n_factors))
        user_factors = np.vstack([svd_model.pu, dummy_factor_row])
        item_factors = np.vstack([svd_model.qi, dummy_factor_row])

        bu = user_biases[inner_uids]
        bi = item_biases[inner_iids]
        pu = user_factors[inner_uids]
        qi = item_factors[inner_iids]

        dot_product = np.sum(pu * qi, axis=1)
        predictions = global_mean + bu + bi + dot_product

        mask = (inner_uids == -1) | (inner_iids == -1)
        predictions[mask] = global_mean

        index = pd.MultiIndex.from_frame(triplets_df[['user_id', 'song_id']])
        scores_series = pd.Series(predictions, index=index)

        return scores_series.to_dict()

    def prepare_training_data(self, triplets_path, songs_path, user_thresholds_path,
                                      recommender_model=None, svd_scores_dict=None):
        start_time = time.time()

        triplets = pd.read_csv(triplets_path)
        songs = pd.read_csv(songs_path)
        user_thresholds = pd.read_csv(user_thresholds_path)

        songs['safety_scores'] = songs['safety_scores'].apply(json.loads)

        if svd_scores_dict is None and recommender_model is not None:
          svd_scores_dict = self.batch_get_svd_scores(recommender_model, triplets)
        global_mean = recommender_model.trainset.global_mean if recommender_model else 0.0


        triplets['svd_score'] = triplets.apply(
          lambda row: svd_scores_dict.get((row.user_id, row.song_id), global_mean),
          axis=1
        )

        features_df = triplets.merge(user_thresholds, on='user_id', how='inner')

        features_df = features_df.merge(songs[['song_id', 'safety_scores']], on='song_id', how='inner')

        for cat in self.categories:
            threshold_col = f'threshold_{cat}'
            score_key = cat.replace('_', '/')

            features_df[f'song_score_{cat}'] = features_df['safety_scores'].apply(
                lambda x: x.get(score_key, 0)
            )

            features_df[f'safety_dist_{cat}'] = features_df[threshold_col] - features_df[f'song_score_{cat}']

            features_df.drop(f'song_score_{cat}', axis=1, inplace=True)

        features_df['target'] = np.log1p(features_df['listen_count'])
        self.feature_cols = ['svd_score', 'avg_plays_per_song', 'song_count'] + \
                           [f'safety_dist_{cat}' for cat in self.categories]

        print(f"Feature preparation completed in {time.time() - start_time:.1f} seconds")
        print(f"Final dataset: {len(features_df)} samples with {len(self.feature_cols)} features")

        return features_df

    def train(self, df_features, test_size=0.2):
        X = df_features[self.feature_cols].fillna(0).values
        y = df_features['target'].values

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=42
        )

        params = {
            'n_estimators': 100,
            'learning_rate': 0.1,
            'max_depth': 6,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'random_state': 42,
            'objective': 'reg:squarederror',
            'device': 'cuda',
            'predictor': 'gpu_predictor',
        }

        print("Training XGBoost model...")
        start_time = time.time()

        self.model = xgb.XGBRegressor(**params)
        self.model.fit(
            X_train, y_train,
            eval_set=[(X_test, y_test)],
        )

        print(f"Training completed in {time.time() - start_time:.1f} seconds")

        from sklearn.metrics import r2_score, mean_squared_error
        y_pred = self.model.predict(X_test)
        r2 = r2_score(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))

        print(f"\nModel Performance:")
        print(f"R2 Score: {r2:.4f}")
        print(f"RMSE: {rmse:.4f}")

        return self.model

    def batch_rerank_recommendations(self, user_ids, recommendations_dict,
                                   songs_df, user_thresholds_df, batch_size=100):
        all_reranked = {}

        song_safety_cache = {}
        for _, song in songs_df.iterrows():
            song_safety_cache[song['song_id']] = song['safety_scores']

        for i in range(0, len(user_ids), batch_size):
            batch_users = user_ids[i:i+batch_size]
            batch_features = []

            for user_id in batch_users:
                user_recs = recommendations_dict.get(user_id, pd.DataFrame())
                if user_recs.empty:
                    continue

                user_thresh = user_thresholds_df[user_thresholds_df['user_id'] == user_id]
                if user_thresh.empty:
                    continue

                for _, rec in user_recs.iterrows():
                    features = self._extract_features(
                        user_id, rec['song_id'], rec['predicted_score'],
                        user_thresh.iloc[0], song_safety_cache
                    )
                    batch_features.append(features)

            if batch_features:
                batch_df = pd.DataFrame(batch_features)
                X_pred = batch_df[self.feature_cols].fillna(0).values
                predictions = self.model.predict(X_pred)

                batch_df['predicted_plays'] = np.expm1(predictions)
                batch_df['safety_score'] = batch_df['svd_score'] * batch_df['predicted_plays']

                for user_id in batch_users:
                    user_preds = batch_df[batch_df['user_id'] == user_id]
                    if not user_preds.empty:
                        all_reranked[user_id] = user_preds.sort_values('safety_score', ascending=False)

        return all_reranked

    def _extract_features(self, user_id, song_id, svd_score, user_thresh, song_safety_cache):
        features = {
            'user_id': user_id,
            'song_id': song_id,
            'svd_score': svd_score,
            'avg_plays_per_song': user_thresh['avg_plays_per_song'],
            'song_count': user_thresh['song_count']
        }

        safety_scores = song_safety_cache.get(song_id, {})
        for cat in self.categories:
            threshold_key = f'threshold_{cat}'
            score_key = cat.replace('_', '/')
            features[f'safety_dist_{cat}'] = user_thresh[threshold_key] - safety_scores.get(score_key, 0)

        return features

    def rerank_recommendations(self, user_id, recommendations_df, songs_df, user_thresholds_df):
        song_safety_cache = {}
        for _, song in songs_df.iterrows():
            song_safety_cache[song['song_id']] = song['safety_scores']

        user_thresh = user_thresholds_df[user_thresholds_df['user_id'] == user_id]
        if user_thresh.empty:
            print(f"No thresholds found for user {user_id}, returning original order")
            return recommendations_df

        features_list = []
        for _, rec in recommendations_df.iterrows():
            features = self._extract_features(
                user_id, rec['song_id'], rec['predicted_score'],
                user_thresh.iloc[0], song_safety_cache
            )
            features_list.append(features)

        if not features_list:
            return recommendations_df

        features_df = pd.DataFrame(features_list)
        X_pred = features_df[self.feature_cols].fillna(0).values

        predicted_log_plays = self.model.predict(X_pred)

        features_df['predicted_plays'] = np.expm1(predicted_log_plays)
        features_df['safety_score'] = features_df['svd_score'] * features_df['predicted_plays']

        reranked = recommendations_df.merge(
            features_df[['song_id', 'predicted_plays', 'safety_score']],
            on='song_id',
            how='inner'
        )

        return reranked.sort_values('safety_score', ascending=False)

    def save_model(self, path: str):
      with open(path, 'wb') as f:
          pickle.dump({
              'model': self.model,
              'feature_cols': self.feature_cols,
              'categories':  self.categories
          }, f)
      print(f"Reranker saved to {path}")

    @classmethod
    def load_model(cls, path: str):
        with open(path, "rb") as f:
            saved = pickle.load(f)
        inst = cls(categories=saved["categories"])
        inst.model        = saved["model"]
        inst.feature_cols = saved["feature_cols"]
        return inst

# Model Training

In [21]:
print("Starting training pipeline...")
total_start = time.time()

reranker = SafetyReranker()
recommender = MusicRecommender()

data, _ = recommender.prepare_data("triplets_cleaned.csv")
recommender.train()

df_features = reranker.prepare_training_data(
    triplets_path="triplets_cleaned.csv",
    songs_path="joined_songs_cleaned.csv",
    user_thresholds_path="user_data_updated.csv",
    recommender_model=recommender
)

reranker.train(df_features)

print(f"\nTotal pipeline time: {time.time() - total_start:.1f} seconds")

reranker.save_model('safety_reranker.pkl')

Starting training pipeline...
SVD Model trained successfully!
Trained on 73949 users and 4822 items
Feature preparation completed in 19.3 seconds
Final dataset: 845301 samples with 12 features
Training XGBoost model...
[0]	validation_0-rmse:0.56960
[1]	validation_0-rmse:0.54825
[2]	validation_0-rmse:0.53358
[3]	validation_0-rmse:0.51752
[4]	validation_0-rmse:0.50391
[5]	validation_0-rmse:0.49224
[6]	validation_0-rmse:0.48233
[7]	validation_0-rmse:0.47391
[8]	validation_0-rmse:0.46678
[9]	validation_0-rmse:0.46056
[10]	validation_0-rmse:0.45531
[11]	validation_0-rmse:0.45082
[12]	validation_0-rmse:0.44702
[13]	validation_0-rmse:0.44528
[14]	validation_0-rmse:0.44193
[15]	validation_0-rmse:0.44043
[16]	validation_0-rmse:0.43792
[17]	validation_0-rmse:0.43551
[18]	validation_0-rmse:0.43322
[19]	validation_0-rmse:0.43130
[20]	validation_0-rmse:0.42968
[21]	validation_0-rmse:0.42810
[22]	validation_0-rmse:0.42691
[23]	validation_0-rmse:0.42573
[24]	validation_0-rmse:0.42456
[25]	validation_

Parameters: { "predictor" } are not used.



[44]	validation_0-rmse:0.41397
[45]	validation_0-rmse:0.41364
[46]	validation_0-rmse:0.41329
[47]	validation_0-rmse:0.41300
[48]	validation_0-rmse:0.41279
[49]	validation_0-rmse:0.41249
[50]	validation_0-rmse:0.41222
[51]	validation_0-rmse:0.41206
[52]	validation_0-rmse:0.41191
[53]	validation_0-rmse:0.41167
[54]	validation_0-rmse:0.41137
[55]	validation_0-rmse:0.41121
[56]	validation_0-rmse:0.41096
[57]	validation_0-rmse:0.41077
[58]	validation_0-rmse:0.41053
[59]	validation_0-rmse:0.41039
[60]	validation_0-rmse:0.41024
[61]	validation_0-rmse:0.41010
[62]	validation_0-rmse:0.40995
[63]	validation_0-rmse:0.40980
[64]	validation_0-rmse:0.40963
[65]	validation_0-rmse:0.40956
[66]	validation_0-rmse:0.40940
[67]	validation_0-rmse:0.40927
[68]	validation_0-rmse:0.40914
[69]	validation_0-rmse:0.40903
[70]	validation_0-rmse:0.40897
[71]	validation_0-rmse:0.40890
[72]	validation_0-rmse:0.40885
[73]	validation_0-rmse:0.40880
[74]	validation_0-rmse:0.40872
[75]	validation_0-rmse:0.40852
[76]	val

# Example Use

In [23]:
reranker = SafetyReranker.load_model("safety_reranker.pkl")

import pandas as pd, json

print("\n" + "="*50)
print("Testing single user reranking...")

user_thresholds_df = pd.read_csv("user_data_updated.csv")
user_id = user_thresholds_df["user_id"].iloc[0]
print(f"Testing with user: {user_id}")

initial_recs = recommender.get_recommendations(user_id, n_recommendations=300)

initial_recs['svd_rank'] = range(1, len(initial_recs) + 1)

songs_df = pd.read_csv("joined_songs_cleaned.csv")
songs_df["safety_scores"] = songs_df["safety_scores"].apply(json.loads)

safety_categories = ['sexual', 'harassment', 'violence', 'hate']

for cat in safety_categories:
    cat_key = cat.replace('_', '/')
    songs_df[f'{cat}_score'] = songs_df['safety_scores'].apply(lambda x: x.get(cat_key, 0))

songs_df['avg_safety_concern'] = songs_df[[f'{cat}_score' for cat in safety_categories]].mean(axis=1)

reranked_recs = reranker.rerank_recommendations(
    user_id=user_id,
    recommendations_df=initial_recs,
    songs_df=songs_df,
    user_thresholds_df=user_thresholds_df
)

reranked_recs['new_rank'] = range(1, len(reranked_recs) + 1)

reranked_recs['rank_change'] = reranked_recs['svd_rank'] - reranked_recs['new_rank']

reranked_with_safety = reranked_recs.merge(
    songs_df[['song_id', 'avg_safety_concern'] + [f'{cat}_score' for cat in safety_categories] + ['title', 'artist_name']],
    on='song_id',
    how='left'
)

user_thresh = user_thresholds_df[user_thresholds_df['user_id'] == user_id].iloc[0]

print(f"\nUser's safety thresholds:")
for cat in safety_categories:
    thresh_col = f'threshold_{cat}'
    if thresh_col in user_thresh.index:
        print(f"  {cat}: {user_thresh[thresh_col]:.4f}")

print("\n" + "="*50)
print("RANKING CHANGES (First 20 songs after reranking):")
print(f"{'New Rank':<10} {'SVD Rank':<10} {'Change':<10} {'Title':<30} {'Artist':<20} {'SVD Score':<10} {'Safety Score':<12} {'Avg Concern':<12}")
print("-" * 130)

for idx, row in reranked_with_safety.head(20).iterrows():
    rank_symbol = '↑' if row['rank_change'] > 0 else ('↓' if row['rank_change'] < 0 else '=')
    print(f"{row['new_rank']:<10} {row['svd_rank']:<10} {rank_symbol}{abs(row['rank_change']):<9} "
          f"{row['title'][:29]:<30} {row['artist_name'][:19]:<20} "
          f"{row['predicted_score']:<10.3f} {row['safety_score']:<12.3f} {row['avg_safety_concern']:<12.4f}")

print("\n" + "="*50)
print("BIGGEST RANK IMPROVEMENTS (moved up due to safety):")
top_movers = reranked_with_safety.nlargest(10, 'rank_change')
display_cols = ['new_rank', 'svd_rank', 'rank_change', 'title', 'artist_name', 'avg_safety_concern']
print(top_movers[display_cols].to_string(index=False))

print("\n" + "="*50)
print("BIGGEST RANK DROPS (moved down due to safety violations):")
bottom_movers = reranked_with_safety.nsmallest(10, 'rank_change')
print(bottom_movers[display_cols].to_string(index=False))

print("\n" + "="*50)
print("TOP 15 FINAL RECOMMENDATIONS:")
display_cols = ["new_rank", "svd_rank", "title", "artist_name", "safety_score", "avg_safety_concern", "sexual_score", "violence_score"]
print(reranked_with_safety[display_cols].head(15).to_string(index=False, float_format='%.4f'))

print("\n" + "="*50)
print("BOTTOM 15 FINAL RECOMMENDATIONS:")
print(reranked_with_safety[display_cols].tail(15).to_string(index=False, float_format='%.4f'))

print("\n" + "="*50)
print("SAFETY SCORE STATISTICS:")
print(f"Average safety concern for TOP 50 songs: {reranked_with_safety.head(50)['avg_safety_concern'].mean():.4f}")
print(f"Average safety concern for BOTTOM 50 songs: {reranked_with_safety.tail(50)['avg_safety_concern'].mean():.4f}")

print("\n" + "="*50)
print("EXAMPLES OF WHY SONGS WERE DEMOTED:")
demoted_songs = reranked_with_safety[reranked_with_safety['rank_change'] < -50].head(5)
for idx, song in demoted_songs.iterrows():
    print(f"\n'{song['title']}' by {song['artist_name']}")
    print(f"  Dropped from rank {song['svd_rank']} to {song['new_rank']} (change: {song['rank_change']})")

    violations = []
    for cat in safety_categories:
        thresh_col = f'threshold_{cat}'
        if thresh_col in user_thresh.index:
            if song[f'{cat}_score'] > user_thresh[thresh_col]:
                violations.append(f"  - {cat}: {song[f'{cat}_score']:.3f} > threshold {user_thresh[thresh_col]:.3f}")

    if violations:
        print("  Violations:")
        for v in violations:
            print(v)
    else:
        print(f"  High overall safety concern: {song['avg_safety_concern']:.4f}")


Testing single user reranking...
Testing with user: b80344d063b5ccb3212f76538f3d9e43d87dca9e

User's safety thresholds:
  sexual: 0.0376
  harassment: 0.0289
  violence: 0.0887
  hate: 0.0073

RANKING CHANGES (First 20 songs after reranking):
New Rank   SVD Rank   Change     Title                          Artist               SVD Score  Safety Score Avg Concern 
----------------------------------------------------------------------------------------------------------------------------------
1          2          ↑1         Bedroom Suite                  Joe Christmas        1.524      4.556        0.0140      
2          4          ↑2         XRDS                           Covenant             1.469      4.299        0.0027      
3          5          ↑2         Hoisting The Flag              Sondre Lerche        1.444      3.602        0.0029      
4          6          ↑2         One More Step To Take          Delegation           1.437      3.472        0.0021      
5          7   

```
==================================================
Testing single user reranking...
Testing with user: b80344d063b5ccb3212f76538f3d9e43d87dca9e

User's safety thresholds:
  sexual: 0.0376
  harassment: 0.0289
  violence: 0.0887
  hate: 0.0073

==================================================
RANKING CHANGES (First 20 songs after reranking):
New Rank   SVD Rank   Change     Title                          Artist               SVD Score  Safety Score Avg Concern
----------------------------------------------------------------------------------------------------------------------------------
1          2          ↑1         Bedroom Suite                  Joe Christmas        1.524      4.556        0.0140      
2          4          ↑2         XRDS                           Covenant             1.469      4.299        0.0027      
3          5          ↑2         Hoisting The Flag              Sondre Lerche        1.444      3.602        0.0029      
4          6          ↑2         One More Step To Take          Delegation           1.437      3.472        0.0021      
5          7          ↑2         Christmas Time Is Here         Shawn Colvin         1.437      3.464        0.0000      
6          3          ↓3         Caroline                       CREMATORY            1.477      3.061        0.0042      
7          10         ↑3         Glad All Over                  Terrorvision         1.399      2.822        0.0001      
8          12         ↑4         Forgive Me                     Leona Lewis          1.392      2.744        0.0028      
9          16         ↑7         Grand Designs                  Martin Orford        1.384      2.678        0.0020      
10         43         ↑33        Lights And Thunder             White Lion           1.328      2.616        0.0014      
11         44         ↑33        Candy Rain                     Soul For Real        1.328      2.610        0.0000      
12         53         ↑41        Watermelon Man                 Oscar Brown Jr.      1.316      2.602        0.0016      
13         46         ↑33        Elsewhere                      Edenbridge           1.323      2.601        0.0002      
14         47         ↑33        I Thought It Was You           Julia Fordham        1.323      2.600        0.0003      
15         50         ↑35        Goodbye                        Kristinia DeBarge    1.320      2.600        0.0020      
16         49         ↑33        The New Chapter                Kiuas                1.320      2.596        0.0001      
17         56         ↑39        Never Say Never                Armin van Buuren     1.314      2.594        0.0000      
18         57         ↑39        Come Into My World             Kylie Minogue        1.314      2.593        0.0026      
19         23         ↑4         Love Is Not A Fight            Warren Barfield      1.369      2.582        0.0002      
20         1          ↓19        Machine Kit                    16Volt               1.549      2.581        0.0293      

==================================================
BIGGEST RANK IMPROVEMENTS (moved up due to safety):
 new_rank  svd_rank  rank_change                    title         artist_name  avg_safety_concern
      183       292          109           In the Morning         Junior Boys            0.010409
       96       191           95        The Stranger Song       Leonard Cohen            0.004258
      182       273           91                  Beloved  Tenth Avenue North            0.005449
      206       297           91          Future Says Run               Tonic            0.001733
      135       225           90                   Wasted Angus & Julia Stone            0.002800
      164       254           90     Letters From The Sky      Civil Twilight            0.005400
      200       290           90                Thrill Me          Simply Red            0.004915
      140       229           89           Sister Seagull       Be Bop Deluxe            0.002922
      141       230           89                  I Adore            Clear Vu            0.000039
      202       291           89 Golden Mummy Golden Bird      Horse The Band            0.002757

==================================================
BIGGEST RANK DROPS (moved down due to safety violations):
 new_rank  svd_rank  rank_change                title      artist_name  avg_safety_concern
      291        22         -269 Signals Over The Air         Thursday            0.078016
      281        35         -246   For The Dishwasher        GRANDADDY            0.071142
      294        51         -243    All Men Are Liars        Nick Lowe            0.463518
      254        13         -241    Heaven Is A Truck         Pavement            0.056710
      274        40         -234  The Truth Of A Liar August Burns Red            0.067037
      238        14         -224  Fight For Your Life   The Casualties            0.058128
      250        27         -223           Gunn Clapp           O.G.C.            0.516278
      297        85         -212       Stripper Vicar           Mansun            0.114402
      242        36         -206 Reign Of The Tyrants       Jag Panzer            0.302967
      244        39         -205   Decades Of Despair     Carnal Forge            0.188067

==================================================
TOP 15 FINAL RECOMMENDATIONS:
 new_rank  svd_rank                  title       artist_name  safety_score  avg_safety_concern  sexual_score  violence_score
        1         2          Bedroom Suite     Joe Christmas        4.5555              0.0140        0.0200          0.0005
        2         4                   XRDS          Covenant        4.2991              0.0027        0.0000          0.0102
        3         5      Hoisting The Flag     Sondre Lerche        3.6020              0.0029        0.0002          0.0006
        4         6  One More Step To Take        Delegation        3.4718              0.0021        0.0028          0.0005
        5         7 Christmas Time Is Here      Shawn Colvin        3.4645              0.0000        0.0000          0.0000
        6         3               Caroline         CREMATORY        3.0606              0.0042        0.0009          0.0160
        7        10          Glad All Over      Terrorvision        2.8220              0.0001        0.0001          0.0000
        8        12             Forgive Me       Leona Lewis        2.7445              0.0028        0.0102          0.0000
        9        16          Grand Designs     Martin Orford        2.6776              0.0020        0.0000          0.0006
       10        43     Lights And Thunder        White Lion        2.6158              0.0014        0.0000          0.0006
       11        44             Candy Rain     Soul For Real        2.6100              0.0000        0.0000          0.0000
       12        53         Watermelon Man   Oscar Brown Jr.        2.6024              0.0016        0.0009          0.0005
       13        46              Elsewhere        Edenbridge        2.6011              0.0002        0.0001          0.0005
       14        47   I Thought It Was You     Julia Fordham        2.6001              0.0003        0.0000          0.0005
       15        50                Goodbye Kristinia DeBarge        2.6000              0.0020        0.0007          0.0012

==================================================
BOTTOM 15 FINAL RECOMMENDATIONS:
 new_rank  svd_rank                   title        artist_name  safety_score  avg_safety_concern  sexual_score  violence_score
      286       220           Forever Young        Youth Group        1.5517              0.0127        0.0001          0.0161
      287       261               Salt Skin     Ellie Goulding        1.5516              0.0480        0.0001          0.1862
      288       221           Solar Powered        Binary Star        1.5472              0.1973        0.0992          0.4304
      289       278             Unbreakable         Fireflight        1.5433              0.0526        0.0001          0.2049
      290       272        Love Like Winter                AFI        1.5414              0.0363        0.0199          0.1246
      291        22    Signals Over The Air           Thursday        1.5345              0.0780        0.2061          0.1008
      292       209             Pop Is Dead          Radiohead        1.5332              0.0896        0.2074          0.1234
      293       256             Art is Hard            Cursive        1.4962              0.0159        0.0001          0.0231
      294        51       All Men Are Liars          Nick Lowe        1.4921              0.4635        0.0525          0.0833
      295       187          You Never Know Immortal Technique        1.4844              0.3955        0.1815          0.1836
      296       262           Chicks Dig It        Chris Cagle        1.4750              0.0564        0.0647          0.1534
      297        85          Stripper Vicar             Mansun        1.4242              0.1144        0.2079          0.1863
      298       111 Better To Reign In Hell    Cradle Of Filth        1.4217              0.0842        0.0984          0.2021
      299       147        Teach U a Lesson       Robin Thicke        1.4032              0.0959        0.2440          0.0834
      300       285 Break Your Little Heart       All Time Low        1.4004              0.0859        0.0029          0.1989

==================================================
SAFETY SCORE STATISTICS:
Average safety concern for TOP 50 songs: 0.0085
Average safety concern for BOTTOM 50 songs: 0.1390

==================================================
EXAMPLES OF WHY SONGS WERE DEMOTED:

'Big Boss Man' by Bobbie Gentry
  Dropped from rank 11 to 62 (change: -51)
  Violations:
  - harassment: 0.055 > threshold 0.029

'Out Of Sight' by Smash Mouth
  Dropped from rank 21 to 76 (change: -55)
  Violations:
  - harassment: 0.141 > threshold 0.029

'Soulful Dress' by Sugar Pie DeSanto
  Dropped from rank 15 to 87 (change: -72)
  Violations:
  - sexual: 0.065 > threshold 0.038

'Ikea' by Jonathan Coulton
  Dropped from rank 33 to 88 (change: -55)
  Violations:
  - hate: 0.007 > threshold 0.007

'Trojan Horse' by Bloc Party
  Dropped from rank 66 to 118 (change: -52)
  Violations:
  - sexual: 0.180 > threshold 0.038

```