In [None]:
import pandas as pd
from pyecharts.charts import Pie
from pyecharts import options as opts

# 读取两个CSV文件
df_douban = pd.read_csv('豆瓣电影.csv', encoding='gbk')
df_imdb = pd.read_csv('imdb_top_250_纯中文.csv', encoding='gbk')

# 提取电影类型列
genres_douban = df_douban['电影类型'].dropna().tolist()
genres_imdb = df_imdb['电影类型'].dropna().tolist()

# 将电影类型字符串拆分为单独的类型
def split_genres(genres_list):
    genre_list = []
    for genres in genres_list:
        genre_list.extend(genres.split('/'))
    return genre_list

# 分别处理豆瓣和IMDb的电影类型
genre_list_douban = split_genres(genres_douban)
genre_list_imdb = split_genres(genres_imdb)

# 统计每个类型的出现次数
genre_counts_douban = pd.Series(genre_list_douban).value_counts()
genre_counts_imdb = pd.Series(genre_list_imdb).value_counts()

# 转换为适合pyecharts的格式
data_douban = [(genre, count) for genre, count in genre_counts_douban.items()]
data_imdb = [(genre, count) for genre, count in genre_counts_imdb.items()]

# 绘制豆瓣电影类型的饼状图
pie_douban = (
    Pie(init_opts=opts.InitOpts(width="1600px", height="800px"))
    .add("", data_douban, radius=["40%", "55%"])
    .set_global_opts(
        title_opts=opts.TitleOpts(title="豆瓣电影类型分布"),
        legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")
    )
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
pie_douban.render_notebook()
output_path = '豆瓣电影类型分布.html'
pie_douban.render(output_path)
# IMDb电影类型的饼状图
pie_imdb = (
    Pie(init_opts=opts.InitOpts(width="1600px", height="800px"))
    .add("", data_imdb, radius=["40%", "55%"])
    .set_global_opts(
        title_opts=opts.TitleOpts(title="IMDb电影类型分布"),
        legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")
    )
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
pie_imdb.render_notebook()
output_path = 'IMDb电影类型分布.html'
pie_imdb.render(output_path)