In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

import warnings
warnings.filterwarnings("ignore")

In [2]:
data_folder = 'data/'
ratings_filename = 'rating.csv'
anime_filename = 'anime.csv'

In [3]:
ratings = pd.read_csv(data_folder + ratings_filename)
anime = pd.read_csv(data_folder + anime_filename)

display(ratings.head())
display(anime.head())

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


Top-10 highest rated anime with more than 10000 members (horizontal bars)

In [4]:
top_rated_anime = (anime[anime['members'] > 10000]
                   .nlargest(10, 'rating')
                   .sort_values('rating'))

fig = px.bar(top_rated_anime,
             y='name',
             x='rating',
             orientation='h',
             title='Top 10 Highest Rated Anime (members > 10000)',
             text='rating')

fig.update_layout(xaxis_title='Rating', yaxis_title='Anime', margin=dict(l=220))
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.show()

In [5]:
avg_rating_by_type = (anime[anime['members'] > 10000]
                      .groupby('type')['rating']
                      .mean()
                      .reset_index()
                      .sort_values('rating', ascending=False))

fig = px.bar(avg_rating_by_type.sort_values('rating'),
             y='type',
             x='rating',
             orientation='h',
             title='Average Rating by Type (members > 10000)',
             text='rating')

fig.update_layout(xaxis_title='Average Rating', yaxis_title='Type', margin=dict(l=220))
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.show()

In [6]:
df_genre = anime[['genre', 'rating']].dropna(subset=['genre', 'rating']).copy()
df_genre['genre'] = df_genre['genre'].str.split(',')
df_genre = df_genre.explode('genre')
df_genre['genre'] = df_genre['genre'].str.strip()

avg_rating_by_genre = (df_genre
                       .groupby('genre', as_index=False)['rating']
                       .mean()
                       .sort_values('rating', ascending=False))

fig = px.bar(avg_rating_by_genre.sort_values('rating'),
             y='genre',
             x='rating',
             orientation='h',
             title='Average Rating by Genre',
             text='rating')

fig.update_layout(xaxis_title='Average Rating', yaxis_title='Genre', margin=dict(l=220))
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.show()

In [7]:
episodes_numeric = pd.to_numeric(anime['episodes'], errors='coerce')

mask = (episodes_numeric >= 1) & anime['rating'].notna()
anime_eps = anime.loc[mask, ['rating']].copy()
anime_eps['episodes'] = episodes_numeric.loc[mask]

bins = [0.5, 1.5, 6, 13, 26, 52, 100, 200, np.inf]
labels = ['1', '2-6', '7-13', '14-26', '27-52', '53-100', '101-200', '200+']
anime_eps['episodes_bin'] = pd.cut(anime_eps['episodes'],
                                   bins=bins,
                                   labels=labels,
                                   right=True,
                                   include_lowest=True)

avg_rating_by_eps_bin = (anime_eps
                         .groupby('episodes_bin', observed=True)
                         .agg(avg_rating=('rating', 'mean'),
                              count=('rating', 'size'))
                         .reset_index())

avg_rating_by_eps_bin = avg_rating_by_eps_bin.sort_values('avg_rating', ascending=False)

order = avg_rating_by_eps_bin['episodes_bin'].astype(str).tolist()

fig = px.bar(avg_rating_by_eps_bin,
             y='episodes_bin',
             x='avg_rating',
             text='avg_rating',
             hover_data={'count': True},
             category_orders={'episodes_bin': order},
             title='Average Rating by Episodes Bin (including 1-episode titles)',
             orientation='h')

fig.update_layout(xaxis_title='Average Rating',
                  yaxis_title='Episodes bin',
                  margin=dict(l=120))
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.show()

In [8]:
df_members = anime[['type', 'members']].dropna().copy()
df_members = df_members[df_members['members'] > 0]

type_order = (df_members.groupby('type')['members']
              .median()
              .sort_values(ascending=False)
              .index
              .tolist())

fig = px.box(
    df_members,
    x='type',
    y='members',
    points='outliers',
    category_orders={'type': type_order},
    title='Members Distribution by Type',
    labels={'type': 'Type', 'members': 'Members'}
)

fig.update_yaxes(type='log')
fig.update_layout(xaxis_tickangle=-45, margin=dict(l=60, r=30, b=120, t=60))
fig.show()

In [11]:
fig = px.histogram(ratings[ratings['rating'] != -1], x="rating", nbins=10,
                   title="Distribution of user ratings")
fig.show()

In [None]:
top_pop = anime.sort_values("members", ascending=False).head(20)
fig = px.bar(top_pop, x="name", y="members",
             title="Top-20 most popular anime (by members)")
fig.update_layout(xaxis_tickangle=-60)
fig.show()

from collections import Counter
genre_counts = Counter(g.strip()
                       for row in anime["genre"].dropna()
                       for g in row.split(","))
genre_df = (pd.DataFrame(genre_counts.items(),
                         columns=["genre","count"])
            .sort_values("count", ascending=False).head(15))
fig = px.bar(genre_df, x="genre", y="count",
             title="Top genres in the dataset")
fig.show()