# Intra-List Similarity
This is a simplified demonstration of intra-list similarity using mock data. Please note that you will need to find your own similarity measure and fitting features for an actual implementation.

![Screenshot 2024-02-23 at 11.05.20.png](<attachment:Screenshot 2024-02-23 at 11.05.20.png>)

If a recommendation system suggests lists of items that are quite similar to individual users (for instance, a user only getting recommendations for action movies), then the similarity score (ILS) within those lists will be high. So, conversely, low ILS scores indicate higher diversity.

The screenshot above uses Ziegler et al's proposal (2005, https://dl.acm.org/doi/10.1145/1060745.1060754). It is, however, more common to average over all comparisons.

### Option 1: Per list ILS

In [1]:
#imports
from itertools import combinations
import pandas as pd

In [2]:
#simplified movie genre assignment
movies_genres= {
    'Toy Story': ['Family'],
    'Die Hard': ['Action'],
    'Star Wars': ['Sci-Fi'],
    'Titanic': ['Romance'],
    'Serendipity': ['Romance'],
    'Lawrence of Arabia': ['History'],
    'Predator': ['Action'],
    'Face/Off': ['Action'],
    'The Hangover': ['Comedy'],
    'Top Gun': ['Action'],
    'Gangs of New York': ['History'],
    'The Devil Wears Prada': ['Comedy'],
    'Pretty Woman': ['Romance'],
    'Meet the Parents': ['Comedy'],
    'Gladiator': ['Action'],
    'Saving Private Ryan': ['War'],
    'Terminator': ['Sci-Fi'],
    'The Talented Mr. Ripley': ['Thriller'],
    'Geisha': ['Drama'],
    'Sicario': ['Action'],
    'The Big Short': ['Drama'],
    'Legends of the Fall': ['Drama'],
    'Lilo & Stitch': ['Family'],
    'Frozen': ['Family'],
    'Pulp Fiction': ['Crime'],
    'Aquaman': ['Action'],
    'Avengers': ['Sci-Fi'],
    'Uncharted': ['Action'],
    'Get Smart': ['Comedy'],
    'Crazy Stupid Love': ['Romance'],
    'Chef': ['Drama'],
    'Sudden Death': ['Action'],
    'Matrix': ['Sci-Fi'],
    'Lion King': ['Family'],
    'Detective Pikachu': ['Family'],
    'Ad Astra': ['Sci-Fi']
}

In [3]:
#Jaccard similarity function
def jaccard_similarity(set1, set2):
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return intersection / union

#calculate ILS for a list of movies using the Jaccard similarity
def calculate_ils(movies_list):
    #calculate all pairwise Jaccard similarity scores for the movies in the list
    similarities = [
        jaccard_similarity(set(movies_genres[m1]), set(movies_genres[m2]))
        for m1, m2 in combinations(movies_list, 2)
    ]
    #sum of all similarities
    total_similarity = sum(similarities)
    #number of comparisons
    num_comparisons = len(similarities)
    #calculate the ILS
    return total_similarity / num_comparisons if num_comparisons else 0


In [4]:

#movie lists
similar_genre_movies = ['Die Hard', 'Gladiator', 'Predator', 'Face/Off', 'Sicario', 'Terminator', 'Top Gun', 'Aquaman', 'Uncharted','Sudden Death']
mixed_genre_movies = ['Toy Story', 'Frozen', 'Lilo & Stitch', 'Lion King', 'Detective Pikachu', 'Ad Astra', 'Terminator', 'Star Wars', 'Avengers', 'Matrix']
diverse_movies = ['Lawrence of Arabia', 'Frozen', 'Lion King', 'Meet the Parents', 'Pretty Woman', 'The Devil Wears Prada', 'Terminator', 'Saving Private Ryan', 'The Talented Mr. Ripley', 'Gangs of New York']

#ILS for the three lists
ils_similar = calculate_ils(similar_genre_movies)
ils_mixed = calculate_ils(mixed_genre_movies)
ils_diverse = calculate_ils(diverse_movies)


In [5]:

#ILS Scores
print(f"Action Movies ILS: {ils_similar}")
print(f"Mixed Genre Movies ILS: {ils_mixed}")
print(f"Diverse Movies ILS: {ils_diverse}")


Action Movies ILS: 0.8
Mixed Genre Movies ILS: 0.4444444444444444
Diverse Movies ILS: 0.06666666666666667


In [6]:

# Movie lists
similar_genre_movies = ['Die Hard', 'Gladiator', 'Predator', 'Face/Off', 'Sicario', 'Terminator', 'Top Gun', 'Aquaman', 'Uncharted', 'Sudden Death']
mixed_genre_movies = ['Toy Story', 'Frozen', 'Lilo & Stitch', 'Lion King', 'Detective Pikachu', 'Ad Astra', 'Terminator', 'Star Wars', 'Avengers', 'Matrix']
diverse_movies = ['Lawrence of Arabia', 'Frozen', 'Lion King', 'Meet the Parents', 'Pretty Woman', 'The Devil Wears Prada', 'Terminator', 'Saving Private Ryan', 'The Talented Mr. Ripley', 'Gangs of New York']

# Function to calculate ILS for a list of movies using simplified genres
def calculate_simplified_ils(movies_list):
    # Calculate all pairwise similarities for the movies in the list
    # Using the simplified one-genre-per-movie approach
    similarities = [
        1 if movies_genres[m1] == movies_genres[m2] else 0
        for m1, m2 in combinations(movies_list, 2)
    ]
    # Sum of all similarities
    total_similarity = sum(similarities)
    # Number of comparisons
    num_comparisons = len(similarities)
    # Calculate the ILS
    return total_similarity / num_comparisons if num_comparisons else 0

# ILS for the three lists using simplified genres
ils_similar = calculate_simplified_ils(similar_genre_movies)
ils_mixed = calculate_simplified_ils(mixed_genre_movies)
ils_diverse = calculate_simplified_ils(diverse_movies)

# Output the ILS scores
print(f"Similar Genre Movies ILS: {ils_similar}")
print(f"Mixed Genre Movies ILS: {ils_mixed}")
print(f"Diverse Movies ILS: {ils_diverse}")


Similar Genre Movies ILS: 0.8
Mixed Genre Movies ILS: 0.4444444444444444
Diverse Movies ILS: 0.06666666666666667


### Option 2: Recmetrics https://github.com/statisticianinstilettos/recmetrics

In [7]:
import pandas as pd
import recmetrics 

In [8]:
movies = {
    'Movie': ['Toy Story', 'Die Hard', 'Star Wars', 'Titanic', 'Serendipity', 'Lawrence of Arabia', 'Predator', 'Face/Off', 'The Hangover', 'Top Gun', 'Gangs of New York', 'The Devil Wears Prada', 'Pretty Woman', 'Meet the Parents', 'Gladiator', 'Saving Private Ryan', 'Terminator', 'The Talented Mr. Ripley', 'Geisha', 'Sicario', 'The Big Short', 'Legends of the Fall', 'Lilo & Stitch', 'Frozen', 'Pulp Fiction', 'Aquaman', 'Avengers', 'Uncharted', 'Get Smart', 'Crazy Stupid Love', 'Chef', 'Sudden Death', 'Matrix', 'Lion King', 'Detective Pikachu', 'Ad Astra'],
    'Action': [0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
    'Comedy': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    'Drama': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
    'Family': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
    'History': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Romance': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
    'Sci-Fi': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1],
    'Thriller': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'War': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Crime': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
}


In [9]:
feature_df = pd.DataFrame(movies)
feature_df = feature_df.set_index(feature_df.columns[0])
feature_df

Unnamed: 0_level_0,Action,Comedy,Drama,Family,History,Romance,Sci-Fi,Thriller,War,Crime
Movie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Toy Story,0,0,0,1,0,0,0,0,0,0
Die Hard,1,0,0,0,0,0,0,0,0,0
Star Wars,0,0,0,0,0,0,1,0,0,0
Titanic,0,0,0,0,0,1,0,0,0,0
Serendipity,0,0,0,0,0,1,0,0,0,0
Lawrence of Arabia,0,0,0,0,1,0,0,0,0,0
Predator,1,0,0,0,0,0,0,0,0,0
Face/Off,1,0,0,0,0,0,0,0,0,0
The Hangover,0,1,0,0,0,0,0,0,0,0
Top Gun,1,0,0,0,0,0,0,0,0,0


In [10]:
#for three lists (e.g., users)
example_lists =  ['Die Hard', 'Gladiator', 'Predator', 'Face/Off', 'Sicario', 'Terminator', 'Top Gun', 'Aquaman', 'Uncharted','Sudden Death'],['Toy Story', 'Frozen', 'Lilo & Stitch', 'Lion King', 'Detective Pikachu', 'Ad Astra', 'Terminator', 'Star Wars', 'Avengers', 'Matrix'], ['Lawrence of Arabia', 'Frozen', 'Lion King', 'Meet the Parents', 'Pretty Woman', 'The Devil Wears Prada', 'Terminator', 'Saving Private Ryan', 'The Talented Mr. Ripley', 'Gangs of New York']

In [11]:
#calculates one score averaged over all lists
recmetrics.intra_list_similarity(example_lists, feature_df)

0.43703703703703706

### The assignment
Knowing this, use the `movies.csv` and `ratings.csv` in order to calculate how diverse each user is. 

In [41]:
df_movies = pd.read_csv(r"C:\Users\Gebruiker\Documents\CODE\Master\Personalisation\INFOMPPM_local\Week 03\data\movies.csv")
df_ratings = pd.read_csv(r"C:\Users\Gebruiker\Documents\CODE\Master\Personalisation\INFOMPPM_local\Week 03\data\ratings.csv")

In [42]:
df_movies.head(3)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance


In [43]:
df_ratings.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224


In [44]:
# Initialize a set to store unique categories
unique_categories = set()

# Iterate over the column containing strings with categories
for categories_string in df_movies['genres']:
    # Split the string on the '|' character
    categories_list = categories_string.split('|')
    # Update the set of unique categories
    unique_categories.update(categories_list)

# Print the unique categories
print("Unique categories:", unique_categories)

Unique categories: {'Mystery', '(no genres listed)', 'Horror', 'Sci-Fi', 'Film-Noir', 'Drama', 'Romance', 'Animation', 'Action', 'Musical', 'Documentary', 'War', 'Thriller', 'Adventure', 'Comedy', 'Crime', 'Western', 'Fantasy', 'IMAX', 'Children'}


In [47]:
df_movies["genres_list"] = df_movies["genres"].str.split('|')

In [48]:
for i in unique_categories:
    df_movies[i] = 0
    


In [49]:
# Iterate over the 'genres_list' column to find unique genres
for genres_list in df_movies['genres_list']:
    unique_categories.update(genres_list)

# Iterate over each unique genre and update values in the corresponding columns
for genre in unique_categories:
    df_movies[genre] = df_movies['genres_list'].apply(lambda x: 1 if genre in x else 0)
df_movies

Unnamed: 0,movieId,title,genres,genres_list,Mystery,(no genres listed),Horror,Sci-Fi,Film-Noir,Drama,...,Documentary,War,Thriller,Adventure,Comedy,Crime,Western,Fantasy,IMAX,Children
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]",0,0,0,0,0,0,...,0,0,0,1,1,0,0,1,0,1
1,2,Jumanji (1995),Adventure|Children|Fantasy,"[Adventure, Children, Fantasy]",0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,1
2,3,Grumpier Old Men (1995),Comedy|Romance,"[Comedy, Romance]",0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,"[Comedy, Drama, Romance]",0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
4,5,Father of the Bride Part II (1995),Comedy,[Comedy],0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,"[Action, Animation, Comedy, Fantasy]",0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,"[Animation, Comedy, Fantasy]",0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
9739,193585,Flint (2017),Drama,[Drama],0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,"[Action, Animation]",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
df_test = df_ratings.merge(df_movies, on = "movieId")
df_test.head(3)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,genres_list,Mystery,(no genres listed),Horror,...,Documentary,War,Thriller,Adventure,Comedy,Crime,Western,Fantasy,IMAX,Children
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]",0,0,0,...,0,0,0,1,1,0,0,1,0,1
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]",0,0,0,...,0,0,0,1,1,0,0,1,0,1
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]",0,0,0,...,0,0,0,1,1,0,0,1,0,1


In [12]:
df_test2 = df_test.groupby(["userId"]).sum()
df_test2

Unnamed: 0_level_0,movieId,rating,timestamp,title,genres,genres_list,Mystery,(no genres listed),Horror,Sci-Fi,...,Documentary,War,Thriller,Adventure,Comedy,Crime,Western,Fantasy,IMAX,Children
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,430268,1013.0,223876652163,Toy Story (1995)Grumpier Old Men (1995)Heat (1...,Adventure|Animation|Children|Comedy|FantasyCom...,"[Adventure, Animation, Children, Comedy, Fanta...",18,0,17,40,...,0,22,55,85,83,45,7,47,0,42
2,2040158,114.5,41925735739,Tommy Boy (1995)Gladiator (2000)Shawshank Rede...,ComedyAction|Adventure|DramaCrime|DramaDrama|R...,"[Comedy, Action, Adventure, Drama, Crime, Dram...",2,0,1,4,...,3,1,10,3,7,10,1,0,4,0
3,275277,95.0,50952090433,Schindler's List (1993)Highlander (1986)Conan ...,Drama|WarAction|Adventure|FantasyAction|Advent...,"[Drama, War, Action, Adventure, Fantasy, Actio...",1,0,8,15,...,0,5,7,11,9,2,0,4,0,5
4,428140,768.0,208626691177,Seven (a.k.a. Se7en) (1995)Ed Wood (1994)Star ...,Mystery|ThrillerComedy|DramaAction|Adventure|S...,"[Mystery, Thriller, Comedy, Drama, Action, Adv...",23,0,4,12,...,2,7,38,29,104,27,10,19,1,10
5,15129,160.0,37287142755,"Toy Story (1995)Usual Suspects, The (1995)Brav...",Adventure|Animation|Children|Comedy|FantasyCri...,"[Adventure, Animation, Children, Comedy, Fanta...",1,0,1,2,...,0,3,9,8,15,12,2,7,3,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,10806800,4078.0,1315155614819,Toy Story (1995)Seven (a.k.a. Se7en) (1995)Usu...,Adventure|Animation|Children|Comedy|FantasyMys...,"[Adventure, Animation, Children, Comedy, Fanta...",91,0,52,79,...,5,65,199,147,421,133,17,97,16,49
607,347939,708.0,180414617424,Toy Story (1995)Braveheart (1995)Star Wars: Ep...,Adventure|Animation|Children|Comedy|FantasyAct...,"[Adventure, Animation, Children, Comedy, Fanta...",17,0,35,36,...,0,6,61,45,55,27,2,21,1,19
608,3741665,2604.5,932937347826,Toy Story (1995)Grumpier Old Men (1995)Seven (...,Adventure|Animation|Children|Comedy|FantasyCom...,"[Adventure, Animation, Children, Comedy, Fanta...",69,0,97,167,...,6,19,259,181,355,146,11,111,12,88
609,17877,121.0,31347175754,Toy Story (1995)Braveheart (1995)Dumb & Dumber...,Adventure|Animation|Children|Comedy|FantasyAct...,"[Adventure, Animation, Children, Comedy, Fanta...",0,0,2,5,...,2,4,14,10,7,6,1,1,1,2


In [56]:
col_list = list(df_test2)
items_to_remove = ['title','movieId', 'rating', 'genres_list', 'genres', 'timestamp', 'totalGenres']
[col_list.remove(item) for item in items_to_remove if item in col_list]

df_test2['totalGenres'] = df_test2[col_list].sum(axis=1)
df_test2


Unnamed: 0_level_0,Mystery,(no genres listed),Horror,Sci-Fi,Film-Noir,Drama,Romance,Animation,Action,Musical,...,Thriller,Adventure,Comedy,Crime,Western,Fantasy,IMAX,Children,e,totalGenres
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.012912,0.0,0.012195,0.028694,0.000717,0.048780,0.018651,0.020803,0.064562,0.015782,...,0.039455,0.060976,0.059541,0.032281,0.005022,0.033716,0.000000,0.030129,0.5,1.0
2,0.013333,0.0,0.006667,0.026667,0.000000,0.113333,0.006667,0.000000,0.073333,0.000000,...,0.066667,0.020000,0.046667,0.066667,0.006667,0.000000,0.026667,0.000000,0.5,1.0
3,0.004673,0.0,0.037383,0.070093,0.000000,0.074766,0.023364,0.018692,0.065421,0.004673,...,0.032710,0.051402,0.042056,0.009346,0.000000,0.018692,0.000000,0.023364,0.5,1.0
4,0.022330,0.0,0.003883,0.011650,0.003883,0.116505,0.056311,0.005825,0.024272,0.015534,...,0.036893,0.028155,0.100971,0.026214,0.009709,0.018447,0.000971,0.009709,0.5,1.0
5,0.003906,0.0,0.003906,0.007812,0.000000,0.097656,0.042969,0.023438,0.035156,0.019531,...,0.035156,0.031250,0.058594,0.046875,0.007812,0.027344,0.011719,0.035156,0.5,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.017048,0.0,0.009741,0.014800,0.001499,0.130761,0.066504,0.007868,0.028288,0.008243,...,0.037280,0.027538,0.078868,0.024916,0.003185,0.018172,0.002997,0.009179,0.5,1.0
607,0.016378,0.0,0.033719,0.034682,0.000000,0.078998,0.027938,0.005780,0.069364,0.004817,...,0.058767,0.043353,0.052987,0.026012,0.001927,0.020231,0.000963,0.018304,0.5,1.0
608,0.015158,0.0,0.021309,0.036687,0.000879,0.061511,0.023286,0.012083,0.060852,0.007250,...,0.056898,0.039763,0.077988,0.032074,0.002417,0.024385,0.002636,0.019332,0.5,1.0
609,0.000000,0.0,0.010989,0.027473,0.000000,0.104396,0.027473,0.005495,0.060440,0.000000,...,0.076923,0.054945,0.038462,0.032967,0.005495,0.005495,0.005495,0.010989,0.5,1.0


In [57]:
df_test2 = df_test2[col_list].div(df_test2.totalGenres, axis=0)

In [53]:
import plotly.express as px
fig = px.histogram(df_test2, x = "(no genres listed)")
fig.show()

In [54]:
df_test2

Unnamed: 0_level_0,Mystery,(no genres listed),Horror,Sci-Fi,Film-Noir,Drama,Romance,Animation,Action,Musical,...,War,Thriller,Adventure,Comedy,Crime,Western,Fantasy,IMAX,Children,e
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.012912,0.0,0.012195,0.028694,0.000717,0.048780,0.018651,0.020803,0.064562,0.015782,...,0.015782,0.039455,0.060976,0.059541,0.032281,0.005022,0.033716,0.000000,0.030129,0.5
2,0.013333,0.0,0.006667,0.026667,0.000000,0.113333,0.006667,0.000000,0.073333,0.000000,...,0.006667,0.066667,0.020000,0.046667,0.066667,0.006667,0.000000,0.026667,0.000000,0.5
3,0.004673,0.0,0.037383,0.070093,0.000000,0.074766,0.023364,0.018692,0.065421,0.004673,...,0.023364,0.032710,0.051402,0.042056,0.009346,0.000000,0.018692,0.000000,0.023364,0.5
4,0.022330,0.0,0.003883,0.011650,0.003883,0.116505,0.056311,0.005825,0.024272,0.015534,...,0.006796,0.036893,0.028155,0.100971,0.026214,0.009709,0.018447,0.000971,0.009709,0.5
5,0.003906,0.0,0.003906,0.007812,0.000000,0.097656,0.042969,0.023438,0.035156,0.019531,...,0.011719,0.035156,0.031250,0.058594,0.046875,0.007812,0.027344,0.011719,0.035156,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.017048,0.0,0.009741,0.014800,0.001499,0.130761,0.066504,0.007868,0.028288,0.008243,...,0.012177,0.037280,0.027538,0.078868,0.024916,0.003185,0.018172,0.002997,0.009179,0.5
607,0.016378,0.0,0.033719,0.034682,0.000000,0.078998,0.027938,0.005780,0.069364,0.004817,...,0.005780,0.058767,0.043353,0.052987,0.026012,0.001927,0.020231,0.000963,0.018304,0.5
608,0.015158,0.0,0.021309,0.036687,0.000879,0.061511,0.023286,0.012083,0.060852,0.007250,...,0.004174,0.056898,0.039763,0.077988,0.032074,0.002417,0.024385,0.002636,0.019332,0.5
609,0.000000,0.0,0.010989,0.027473,0.000000,0.104396,0.027473,0.005495,0.060440,0.000000,...,0.021978,0.076923,0.054945,0.038462,0.032967,0.005495,0.005495,0.005495,0.010989,0.5
