The variables for Python Libraries & Dataframe

In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
df = pd.read_json('anime.json')

In [2]:
# Filter the DataFrame to include only rows where the "year" column is not None
filtered_df = df[df["year"].notna()]
year_counts = filtered_df["year"].value_counts().sort_index()
bar_graph = px.bar(
    x=year_counts.index,
    y=year_counts.values,
    labels={'x': 'Year of Release', 'y': 'Count of Anime Released'},
    title='Number of Anime Released Each Year',
    width=1200,
)

bar_graph.show()


In [3]:
type_year_counts = filtered_df.groupby('type').size().reset_index(name='total_count').sort_values(by='total_count', ascending=False)
print(type_year_counts)

         type  total_count
7          TV         7874
1       Movie         4835
4         OVA         4102
2       Music         3936
3         ONA         3906
6     Special         1736
8  TV Special          758
0          CM          483
5          PV          269


In [6]:
# Stack bar graph of number of anime released each year by type
type_year_counts = filtered_df.groupby(['year', 'type']).size().reset_index(name='count')
type_year_counts
stacked_bar_graph = px.bar(
    type_year_counts,
    x='year',
    y='count',
    color='type',
    title='Number of Anime Released Each Year By Type',
    labels={'year': 'Year of Release', 'count': 'Count of Anime Released'}
)

stacked_bar_graph.show()

In [5]:
filtered_out_null_from_type = filtered_df[filtered_df["type"].notna()]

for t in filtered_out_null_from_type['type'].unique():
    subset = filtered_out_null_from_type[filtered_out_null_from_type['type'] == t]
    counts = subset['year'].value_counts().sort_index().reset_index()
    bar_graph_type = px.bar(
        counts,
        x='year',
        y='count',
        title =f'Number of Anime Released Each Year - {t}',
        labels={'year': 'Year of Release', 'count': 'Count of Anime Released'}
    )
    bar_graph_type.show()

In [27]:
boxplot_filtered_df = df[df["score"].notna() & df["type"].notna()]

# Box plot of score by type

score_box_plot = px.box(
    boxplot_filtered_df,
    x='type',
    y='score',
    hover_data=['title'],
    title="Distribution of Anime Scores by Type",
    labels={'type': "Type of Anime", 'score': 'Average score from users (1 to 10)'}
)

score_box_plot.show()

In [23]:
# Drop empty values out of the data
genre_lists = df['genres'].dropna()
all_genres = [genre for list in genre_lists for genre in list]
genre_counts = pd.Series(all_genres).value_counts().reset_index()
genre_counts.columns = ['genre', 'count']

#plot pie chart
plot_pie = px.pie(
    genre_counts,
    names="genre",
    values="count",
    title="Distribution of Anime by Genre"
)

plot_pie.show()

In [None]:
boxplot_genre_data = df[df["score"].notna() & df["genres"].notna()]
genre_data_exploded = boxplot_genre_data.explode('genres')
genre_data_exploded = genre_data_exploded[genre_data_exploded['genres'].notna() & (genre_data_exploded['genres'] != '')]

studio_box_plot = px.box(
    genre_data_exploded,
    x='genres',
    y='score',
    hover_data=['title'],
    title="Distribution of Anime Scores by Genre",
    labels={'genres': "Anime Genre", 'score': 'Average score from users (1 to 10)'}
)
studio_box_plot.show()