In [2]:
import pandas as pd
import numpy as np
import plotly.express as px

animeDB = pd.read_csv("MAL-anime.csv")
animeDB.dropna()
animeDB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12774 entries, 0 to 12773
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  12774 non-null  int64  
 1   Title       12774 non-null  object 
 2   Rank        12774 non-null  int64  
 3   Type        12774 non-null  object 
 4   Episodes    12774 non-null  object 
 5   Aired       12774 non-null  object 
 6   Members     12774 non-null  int64  
 7   page_url    12774 non-null  object 
 8   image_url   12774 non-null  object 
 9   Score       12774 non-null  float64
dtypes: float64(1), int64(3), object(6)
memory usage: 998.1+ KB


In [3]:
animeDB = animeDB.drop(["page_url", "image_url", "Unnamed: 0"], axis=1)
animeDB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12774 entries, 0 to 12773
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Title     12774 non-null  object 
 1   Rank      12774 non-null  int64  
 2   Type      12774 non-null  object 
 3   Episodes  12774 non-null  object 
 4   Aired     12774 non-null  object 
 5   Members   12774 non-null  int64  
 6   Score     12774 non-null  float64
dtypes: float64(1), int64(2), object(4)
memory usage: 698.7+ KB


In [4]:
rank_distribution = px.histogram(animeDB, x="Rank",
                                 title="Rank Distribution",
                                 opacity=0.85,
                                 log_y=True, 
                                 color_discrete_sequence=["blue"])
rank_distribution.show()

In [5]:
member_distribution = px.histogram(animeDB, x="Members",
                                 title="Members Distribution",
                                 opacity=0.85,
                                 color_discrete_sequence=["purple"])
member_distribution.show()

In [6]:
score_distribution = px.histogram(animeDB, x="Score",
                                 title="Score Distribution",
                                 opacity=0.85,
                                 color_discrete_sequence=["red"])
score_distribution.show()

In [7]:
best_scored = animeDB.groupby("Title")["Score"].sum().nlargest(20)

score_by_title = px.histogram(animeDB, x=best_scored.index,
                                y=best_scored,
                                title="Top Anime",
                                labels={'x':'' ,'y':'Score'},
                                color=best_scored.index,
                                opacity=0.85,
                                height=900)
score_by_title.show()

In [8]:
most_members = animeDB.groupby("Title")["Members"].sum().nlargest(20)

members_by_title = px.histogram(animeDB, x=most_members.index,
                                y=most_members,
                                title="Top Members",
                                labels={'x':'','y':'Members'},
                                color=most_members.index,
                                opacity=0.85, 
                                height=670)

members_by_title.show()

In [9]:
#Here i'll need to convert Episodes dtype column to float64 to the next chart, was object.
animeDB["Episodes"] = pd.to_numeric(animeDB["Episodes"], errors='coerce', downcast='integer')
#Now if you write animeDB.info() you will see the new dtype in Episodes.
animeDB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12774 entries, 0 to 12773
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Title     12774 non-null  object 
 1   Rank      12774 non-null  int64  
 2   Type      12774 non-null  object 
 3   Episodes  12719 non-null  float64
 4   Aired     12774 non-null  object 
 5   Members   12774 non-null  int64  
 6   Score     12774 non-null  float64
dtypes: float64(2), int64(2), object(3)
memory usage: 698.7+ KB


In [10]:
more_episodes = animeDB.groupby("Title")["Episodes"].sum().nlargest(20)

episodes_by_title = px.histogram(animeDB, 
                                 x=more_episodes.index,
                                 y=more_episodes,
                                 title="Top Longer Animes",
                                 labels={'x':'','y':'Episodes'},
                                 color=more_episodes.index,
                                 opacity=0.85, 
                                 height = 875)

episodes_by_title.show()

In [11]:
types = px.pie(animeDB, animeDB["Type"].value_counts().index,
                 animeDB["Type"].value_counts(normalize=True),
                 title="Type",
                 opacity=0.85)

types.show()

In [12]:
top_aired = animeDB.groupby("Aired")["Score"].sum().nlargest(20)

best_scored_aired = px.histogram(animeDB, x=top_aired.index,
                                 y=top_aired.values,
                                 labels={'x':'','y':'Count'},
                                 color=top_aired.index,
                                 title="Top Anime Aired",
                                 opacity=0.85,
                                 height=630)
best_scored_aired.show()