In [1]:
# coding:utf-8
__author__ = 'Mr.数据杨'
__explain__ = '文件目录说明：' \
              'data：用于存放视频中生成内容的excel表格数据，以及AI抠图的日志文件' \
              'font：用于存放字体文件' \
              'material_base：用于存放视频素材片头、片尾、片中、过场的MP4' \
              'material_everypart：根据不同的内容存放算法生成的无语音part素材和封面' \
              'material_jpg：用于存放视频用使用的水印、封面、不同的内容按照规则生成的图片' \
              'material_mp3：用于存放百度AI生成的MP3文件' \
              'material_result：用于存放最终视频生成的结果文件，如果生成同样的内容需要将源文件删除' \
              'material_video：根据不同的内容存放算法生成的合成语音后part素材和封面、总合成的结果' \
              '备份脚本：该项目Debug的过程' \
              '' \
              '使用说明：' \
              '1.在material_jpg中创建内容的文件夹，名称为对应pic_name的名称' \
              '2.在互联网上采集对应内容的图片，改名pic_name.jpg格式' \
              '3.无脑启动脚本等material_result出结果' \
              '4.代码423行处，根据material_jpg的base的文件夹下fuyong、zhongzhi进行随机切换图片生成不同的内容，素材自行搞定' \
              '5.水印根据material_jpg的base下的logo.png进行更换' \
              '6.封面根据material_jpg的base下的cover.pptx进行操作生成base.jpg进行更换'

import pandas as pd
import librosa
import cv2
import numpy as np
from PIL import Image
from moviepy.editor import *
import subprocess
import os,shutil
import glob
from removebg import RemoveBg
from aip import AipSpeech
from urllib import parse
import requests
import re
from pyecharts import options as opts
from pyecharts.charts import Map
from snapshot_selenium import snapshot
from pyecharts.render import make_snapshot

# 配置数据

In [None]:
def config():
    # 字体配置文件
    font_path = './font/kaiti.ttf'
    # 文件对应中文名称配置
    text_dict = {
        "CnName": "药材名称",
        "PinYin": "汉语拼音",
        "LaiYuan": "文件原文",
        "LaDing": "原拉丁植物动物矿物名",
        "YCJY": "药材基原",
        "YWGJ": "性味归经",
        "GNZZ": "功能主治",
        "QYFB": "地理分布",
        "SS": "省市",
        "EnName": "英文名",
        "YCBM": "药材别名",
        "HXCF": "化学成分",
        "MJLS": "名家论述",
        "LDWM": "拉丁文名",
        "YLZY": "药理作用",
        "FF": "附方",
        "XDLCYJ": "现代临床研究",
        "ZYSX": "注意事项",
        "YYLB": "药用类别",
        "XDHYJ": "现代化研究",
    }
    return font_path, text_dict


# 文字处理对应方法 20个字符 常规使用
def clean_word(word):
    n = 20
    word_len = int(len(word) / n)
    word_num = 0
    while word_num <= word_len:
        if word_num == 0:
            strs = word[:(word_num + 1) * n] + "\n"
        else:
            #             strs = word[:(word_num + 1) * 8] + "..."
            strs = strs + word[word_num * n:(word_num + 1) * n] + "\n"
        word_num = word_num + 1
    return word_len, strs

### 创建素材的各个文件夹
def MakeMaterialDir(pic_name):
    dirs = 'material_everypart/' + pic_name
    if not os.path.exists(dirs):
        os.makedirs(dirs)
        print("创建 " + 'material_everypart/' + pic_name + " 文件夹完毕")

    dirs = 'material_video/' + pic_name
    if not os.path.exists(dirs):
        os.makedirs(dirs)
        print("创建 " + 'material_video/' + pic_name + " 文件夹完毕")

    dirs = 'material_jpg/' + pic_name
    if not os.path.exists(dirs):
        os.makedirs(dirs)
        print("创建 " + 'material_jpg/' + pic_name + " 文件夹完毕")

    dirs = 'material_mp3/' + pic_name
    if not os.path.exists(dirs):
        os.makedirs(dirs)
        print("创建 " + 'material_mp3/' + pic_name + " 文件夹完毕")


### 制作前删除合成语音的文件，否则无法继续
def CleanFiles(pic_name):
    path = "./material_video/" + pic_name
    for infile in glob.glob(os.path.join(path, '*.mp4')):
        os.remove(infile)
    print(pic_name + "material_video 旧文件清理完毕")

    path = "./material_everypart/" + pic_name
    for infile in glob.glob(os.path.join(path, '*.mp4')):
        os.remove(infile)
    print(pic_name + "material_everypart 旧文件清理完毕")

    path = "./material_mp3/" + pic_name
    for infile in glob.glob(os.path.join(path, '*.mp3')):
        os.remove(infile)
    print(pic_name + "material_mp3 旧文件清理完毕")


### 药材图片进行抠图生成素材
def CutoutJPG(pic_name):
    # 图片抠图处理
    # 33034782@qq.com   ypWN2SE5p57qKMk6jtaaSXRq
    # escaflowne1@126.com   exQr6L4B7Fe5LmnHpw5ZrJ4V
    # escaflowne2@126.com   HhhM9FETxb4NTBqFGbMfWm5d
    rmbg = RemoveBg("exQr6L4B7Fe5LmnHpw5ZrJ4V", "data/error.log")  # 把你的`API Key`填进去
    # 判断视频图片是否存在，若存在则跳过
    png_name = "./material_jpg/" + pic_name + "/" + pic_name + ".jpg_no_bg.png"
    if not os.path.exists(png_name):
        try:
            rmbg.remove_background_from_img_file("./material_jpg/" + pic_name + "/" + pic_name + ".jpg")
        except:  # 如果无法抠图自动处理成png
            shutil.copyfile("./material_jpg/" + pic_name + "/" + pic_name + ".jpg", "./material_jpg/" + pic_name + "/" + pic_name + ".jpg_no_bg.png")
    else:
        print("该图片的内容抠图已经存在")


### 合成封面图片
def CompositeCoverJPG(pic_name):
    # 加载背景图片
    base_img = Image.open('material_jpg/base/base.jpg')
    # 转换图片色到分别表示RGBA的值
    target = Image.new('RGBA', base_img.size, (0, 0, 0, 0))
    # 选择png图片显示的区域
    box = (800, 400, 1260, 690)
    # 加载PNG图片
    region = Image.open('material_jpg/' + pic_name + '/' + pic_name + '.jpg_no_bg.png')
    # 确保图片是RGBA格式，大小和box区域一样
    region = region.convert("RGBA")
    region = region.resize((box[2] - box[0], box[3] - box[1]))
    # 将素材图片合成道底板图上
    target.paste(region, box)
    # 将背景图上假如生成号的素材地板透明背景图
    base_img.paste(target, (0, 0), target)  # 第一个参数表示需要粘贴的图像，中间的是坐标，最后是一个是mask图片，用于指定透明区域，将底图显示出来。
    # base_img.show()
    base_img.save('material_jpg/' + pic_name + '/result.jpg')  # 保存图片


### 读取文字转语音
def ChangeWordsToMp3(dataframe):
    # 把文字转换成语音 将生成的音频文件保存到material_mp3下
    # 加载百度AIP账号
    APP_ID = '23757531'
    API_KEY = 'hgRtmngXF4pXBoXuOvwKZO65'
    SECRET_KEY = 'ND12QZt4Mq3eBGHqCzarHDGerYX97kNl'
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    # 遍历该条df数据每列文字进行语音MP3转换
    for col in dataframe.columns:
        try:
            result = client.synthesis(text_dict[col] + dataframe[col][0], 'zh', 1, {'vol': 12, 'spd': 6, 'per': 0})
            if not isinstance(result, dict):
                with open('material_mp3/' + dataframe["CnName"][0] + "/" + col + '.mp3', 'wb') as f:
                    f.write(result)
        except:
            pass


### 构建文字生成MP3的内容属性字典
def Mp3Info(df):
    # 获取MP3的文件列表
    def file_name(file_dir):
        list_ = [files for files in os.walk(file_dir)][0][2]
        filelist = [i for i in list_ if os.path.splitext(i)[1] == '.mp3']
        return filelist

    # 读取文字转语音的MP3 并计算时长
    def get_mp3_duration(audio_path):
        duration = librosa.get_duration(filename=audio_path)
        return duration

    filelist = file_name("./material_mp3/" + df["CnName"][0])

    time_name_dict = {}
    time_num_all = 0  # 总音频的秒数
    for i in filelist:
        time_num = get_mp3_duration("material_mp3/" + df["CnName"][0] + "/" + i)
        time_num_all = time_num_all + time_num
        time_name_dict[i] = time_num

    return time_name_dict


"""
拼接顺序制作
"CnName":"药材名称"
"LaiYuan":"文件原文"
"YCBM":"药材别名",
"YYLB":"药用类别",
"PinYin":"汉语拼音",
"LaDing":"原拉丁植物动物矿物名",
"EnName":"英文名",
"LDWM":"拉丁文名",
"YCJY":"药材基原",
"YWGJ":"性味归经",
"GNZZ":"功能主治",
"ZYSX":"注意事项",
"YLZY":"药理作用",
"QYFB":"地理分布",
"SS":"省市", # 自定义生成图表使用
"MJLS":"名家论述",
"HXCF":"化学成分",
"XDLCYJ":"现代临床研究",
"XDHYJ":"现代化研究",
"FF":"附方",
"""


### 第一部分
def FirstPart(pic_name, df):
    # 选取第一部分的字段
    word_list = ["CnName", "LaiYuan", "YCBM", "YYLB"]
    data_list = []  # 选择有用的数据进行处理
    f_write = open('material_mp3/' + pic_name + "/" + '1st.mp3', 'wb')  # 处理有用数据的MP3

    for col in df[word_list].columns:
        if df[col][0] != "暂无数据":
            # 文本数据部分
            col_text = df[col]
            data_text = df[col][0]
            col_duration_time = time_name_dict[col + ".mp3"]
            one_dict = {col: {"text": data_text, "time": col_duration_time}}
            data_list.append(one_dict)

            # 语音数据部分
            f_read = open('material_mp3/' + pic_name + "/" + col + '.mp3', 'rb')
            f_write.write(f_read.read())
            f_read.close()
    f_write.flush()
    f_write.close()

    start_time = 0
    all_duration_time = sum([list(data_list[i].values())[0]["time"] for i in range(len(data_list))])

    # 文字的坐标参数
    title_y1 = 80
    title_y2 = 140
    title_y = 110

    result_list = []

    for i in range(len(data_list)):
        title = text_dict[list(data_list[i].keys())[0]]
        content = list(data_list[i].values())[0]["text"]
        duration_time = list(data_list[i].values())[0]["time"]

        # 判断内容过去分行
        if len(content) > 15:
            words_len, words_result = clean_word(content)
            content = "\n".join(words_result.split("\n")[:-1])
        else:
            words_len = 1

        ## 将标题和文字融入到视频中
        txt_title = (
            TextClip(title, font=font_path, fontsize=50, color='black', method='label', align='West')
                .set_position((230, title_y1))
                .set_duration(all_duration_time - start_time)
                .set_start(start_time)
        )
        txt_content = (
            TextClip(content, font=font_path, fontsize=40, color='black', method='label', align='West')
                .set_position((260, title_y2))
                .set_duration(all_duration_time - start_time)
                .set_start(start_time)
        )

        result_list.append(txt_title)
        result_list.append(txt_content)

        start_time = start_time + duration_time
        title_y1 = title_y1 + title_y + (words_len - 1) * 50
        title_y2 = title_y2 + title_y + (words_len - 1) * 50

    # 添加该模块材料图片（药材图片）
    Image = (
        ImageClip("./material_jpg/" + pic_name + "/" + pic_name + ".jpg_no_bg.png")
            .set_duration(start_time)  # 水印持续时间
            .resize(height=300)  # 水印的高度，会等比缩放
            .set_pos((600, 90))  # 水印的位置
    )

    # 加入水印
    logo = (
        ImageClip("./material_jpg/base/logo.png")
            .set_duration(start_time)  # 水印持续时间
            .resize(height=50)  # 水印的高度，会等比缩放
            .set_pos(("right", "top"))  # 水印的位置
    )

    # 与背景进行合成
    L = []
    path = "material_base/1s.mp4"
    video = VideoFileClip(path)
    for i in range(int(all_duration_time) + 1):
        L.append(video)
    final_clip = concatenate_videoclips(L).set_duration(all_duration_time).resize((1280, 720))
    cvc = CompositeVideoClip([final_clip, Image, logo] + result_list, size=(1280, 720))
    cvc.write_videofile("./material_everypart/" + pic_name + "/" + "1st.mp4", fps=60, remove_temp=False)

    # 将对应的音频和视频进行合成
    outfile_name = 'material_video/' + pic_name + '/' + '1st.mp4'
    subprocess.call('ffmpeg -i ' + 'material_everypart/' + pic_name + '/' + '1st.mp4'
                    + ' -i ' + 'material_mp3/' + pic_name + '/' + '1st.mp3' + ' -strict -2 -f mp4 '
                    + outfile_name, shell=True)
    print("第一部分内容处理完毕，如果发现黑屏请调整改部分的clean_word")


### 第二部分
def SecondPart(pic_name, df):
    # 选取第二部分的字段
    word_list = ["PinYin", "LaDing", "EnName", "LDWM"]

    data_list = []  # 选择有用的数据进行处理
    f_write = open('material_mp3/' + pic_name + "/" + '2nd.mp3', 'wb')  # 处理有用数据的MP3

    for col in df[word_list].columns:
        if df[col][0] != "暂无数据":
            # 文本数据部分
            col_text = df[col]
            data_text = df[col][0]
            col_duration_time = time_name_dict[col + ".mp3"]
            one_dict = {col: {"text": data_text, "time": col_duration_time}}
            data_list.append(one_dict)

            # 语音数据部分
            f_read = open('material_mp3/' + pic_name + "/" + col + '.mp3', 'rb')
            f_write.write(f_read.read())
            f_read.close()
    f_write.flush()
    f_write.close()

    start_time = 0
    all_duration_time = sum([list(data_list[i].values())[0]["time"] for i in range(len(data_list))])

    # 文字的坐标参数
    title_y1 = 80
    title_y2 = 140
    title_y = 110

    result_list = []

    for i in range(len(data_list)):
        title = text_dict[list(data_list[i].keys())[0]]
        content = list(data_list[i].values())[0]["text"]
        duration_time = list(data_list[i].values())[0]["time"]

        ## 将标题和文字融入到视频中
        txt_title = (
            TextClip(title, font=font_path, fontsize=50, color='black', method='label', align='West')
                .set_position((230, title_y1))
                .set_duration(all_duration_time - start_time)
                .set_start(start_time)
        )
        txt_content = (
            TextClip(content, font=font_path, fontsize=40, color='black', method='label', align='West')
                .set_position((260, title_y2))
                .set_duration(all_duration_time - start_time)
                .set_start(start_time)
        )

        result_list.append(txt_title)
        result_list.append(txt_content)

        start_time = start_time + duration_time
        title_y1 = title_y1 + title_y
        title_y2 = title_y2 + title_y

    # 添加该模块材料图片（药材图片）
    Image = (
        ImageClip("./material_jpg/" + pic_name + "/" + pic_name + ".jpg_no_bg.png")
            .set_duration(start_time)  # 水印持续时间
            .resize(height=300)  # 水印的高度，会等比缩放
            .set_pos((600, 90))  # 水印的位置
    )

    # 加入水印
    logo = (
        ImageClip("./material_jpg/base/logo.png")
            .set_duration(start_time)  # 水印持续时间
            .resize(height=50)  # 水印的高度，会等比缩放
            .set_pos(("right", "top"))  # 水印的位置
    )

    # 与背景进行合成
    L = []
    path = "material_base/1s.mp4"
    video = VideoFileClip(path)
    for i in range(int(all_duration_time) + 1):
        L.append(video)
    final_clip = concatenate_videoclips(L).set_duration(all_duration_time).resize((1280, 720))
    cvc = CompositeVideoClip([final_clip, Image, logo] + result_list, size=(1280, 720))
    cvc.write_videofile("./material_everypart/" + pic_name + "/" + "2nd.mp4", fps=60, remove_temp=False)

    # 将对应的音频和视频进行合成
    outfile_name = 'material_video/' + pic_name + '/' + '2nd.mp4'
    subprocess.call('ffmpeg -i ' + 'material_everypart/' + pic_name + '/' + '2nd.mp4'
                    + ' -i ' + 'material_mp3/' + pic_name + '/' + '2nd.mp3' + ' -strict -2 -f mp4 '
                    + outfile_name, shell=True)
    print("第二部分内容处理完毕，如果发现黑屏请调整改部分的clean_word")


### 第三部分
def ThirdPart(pic_name, df):
    # 选取第三部分的字段
    word_list = ["YCJY", "YWGJ", "GNZZ", "ZYSX", "YLZY"]
    data_list = []  # 选择有用的数据进行处理
    f_write = open('material_mp3/' + pic_name + "/" + '3rd.mp3', 'wb')  # 处理有用数据的MP3

    for col in df[word_list].columns:
        if df[col][0] != "暂无数据":
            # 文本数据部分
            col_text = df[col]
            data_text = df[col][0]
            col_duration_time = time_name_dict[col + ".mp3"]
            one_dict = {col: {"text": data_text, "time": col_duration_time}}
            data_list.append(one_dict)

            # 语音数据部分
            f_read = open('material_mp3/' + pic_name + "/" + col + '.mp3', 'rb')
            f_write.write(f_read.read())
            f_read.close()
    f_write.flush()
    f_write.close()

    start_time = 0
    result_list = []
    all_duration_time = sum([list(data_list[i].values())[0]["time"] for i in range(len(data_list))])

    for i in range(len(data_list)):
        title = text_dict[list(data_list[i].keys())[0]]
        content = list(data_list[i].values())[0]["text"]
        duration_time = list(data_list[i].values())[0]["time"]

        # 获得文字长度
        words_len, words_result = clean_word(content)
        # 构建切换的字幕方法
        str_list = words_result.split("\n")[:-1]
        allstrs = []  # 分组切分后的汉字放在这里
        every_list = []
        for i in range(len(str_list)):
            every_list.append((str_list[i] + "\n"))
            if i == 0:
                allstrs.append(every_list)
            if i % 2 == 0 and i != 0:
                every_list = []
                allstrs.append(every_list)
            # 分多少页、和时长，由于切分音频是要 1|1|1|1 这么切分
        allstrs = [i for i in allstrs if i != []]  # 去除无用的空list避免报错
        allstrs = [i for i in allstrs if i != ['\n']]  # 去除无用的空list避免报错
        page_num, every_page_time = len(allstrs), duration_time / len(allstrs)
        #     print(len(allstrs),page_num , every_page_time)
        #     print("起始时间：{}".format(start_time),"持续时间：{}".format(duration_time),"下段开始时间：{}".format(start_time + duration_time))
        #     print("字段内容段落数：{}".format(page_num),"每段持续时间：{}".format(every_page_time))

        # 标题位置
        txt_title = (
            TextClip(title, font=font_path, fontsize=50, color='black', method='label', align='West')
                .set_position((210, 320))
                .set_duration(duration_time)
                .set_start(start_time)
        )
        logo = (
            ImageClip("./material_jpg/base/logo.png")
                .set_duration(duration_time)  # 水印持续时间
                .resize(height=50)  # 水印的高度，会等比缩放
                .set_pos(("right", "top"))  # 水印的位置
        )
        Image = (
            ImageClip("./material_jpg/" + pic_name + "/" + pic_name + ".jpg_no_bg.png")
                .set_duration(all_duration_time)  # 水印持续时间
                .resize(height=250)  # 水印的高度，会等比缩放
                .set_pos(("center", 90))  # 水印的位置
        )

        # 每个部分
        txt_list = []
        start_every = start_time

        for i in range(len(allstrs)):
            txt_every = (
                TextClip("".join(allstrs[i]), font=font_path, fontsize=40, color='black', method='label', align='West')
                    .set_position((230, 390))
                    .set_duration(every_page_time)
                    .set_start(start_every + every_page_time * i)
            )
            #         print(start_every + every_page_time * i)
            txt_list.append(txt_every)
        #         print("".join(allstrs[i]),start_time + every_page_time)
        start_time = start_time + duration_time

        result_list.append(txt_title)
        result_list = result_list + txt_list

    L = []
    path = "material_base/1s.mp4"
    video = VideoFileClip(path)
    for i in range(int(start_time) + 1):
        L.append(video)
    final_clip = concatenate_videoclips(L).set_duration(start_time).resize((1280, 720))
    cvc = CompositeVideoClip([final_clip, Image, logo] + result_list, size=(1280, 720))
    cvc.write_videofile("./material_everypart/" + pic_name + "/" + "3rd.mp4", fps=60, remove_temp=False, verbose=True)

    # 将对应的音频和视频进行合成
    outfile_name = 'material_video/' + pic_name + '/' + '3rd.mp4'
    subprocess.call('ffmpeg -i ' + 'material_everypart/' + pic_name + '/' + '3rd.mp4'
                    + ' -i ' + 'material_mp3/' + pic_name + '/' + '3rd.mp3' + ' -strict -2 -f mp4 '
                    + outfile_name, shell=True)
    print("第三部分内容处理完毕，如果发现黑屏请调整改部分的clean_word")


### 第四部分
def FourthPart(pic_name, df):
    # 选取第四部分的字段
    word_list = ["QYFB", "SS"]

    data_list = []  # 选择有用的数据进行处理
    f_write = open('material_mp3/' + pic_name + "/" + '4th.mp3', 'wb')  # 处理有用数据的MP3

    for col in [df[word_list].columns[0]]:
        if df[col][0] != "暂无数据":
            # 文本数据部分
            col_text = df[col]
            data_text = df[col][0]
            col_duration_time = time_name_dict[col + ".mp3"]
            one_dict = {col: {"text": data_text, "time": col_duration_time, "area": eval(df["SS"][0])}}
            data_list.append(one_dict)

            # 语音数据部分
            f_read = open('material_mp3/' + pic_name + "/" + col + '.mp3', 'rb')
            f_write.write(f_read.read())
            f_read.close()
    f_write.flush()
    f_write.close()

    start_time = 0
    result_list = []
    all_duration_time = sum([list(data_list[i].values())[0]["time"] for i in range(len(data_list))])

    for i in range(len(data_list)):
        title = text_dict[list(data_list[i].keys())[0]]
        content = list(data_list[i].values())[0]["text"]
        duration_time = list(data_list[i].values())[0]["time"]
        area = list(data_list[i].values())[0]["area"]

        # 获得文字长度
        words_len, words_result = clean_word(content)
        # 构建切换的字幕方法
        str_list = words_result.split("\n")[:-1]
        allstrs = []  # 分组切分后的汉字放在这里
        every_list = []
        for i in range(len(str_list)):
            every_list.append((str_list[i] + "\n"))
            if i == 0:
                allstrs.append(every_list)
            if i % 2 == 0 and i != 0:
                every_list = []
                allstrs.append(every_list)
            # 分多少页、和时长，由于切分音频是要 1|1|1|1 这么切分
        allstrs = [i for i in allstrs if i != []]  # 去除无用的空list避免报错
        allstrs = [i for i in allstrs if i != ['\n']]  # 去除无用的空list避免报错
        page_num, every_page_time = len(allstrs), duration_time / len(allstrs)
        #     print(len(allstrs),page_num , every_page_time)
        #     print("起始时间：{}".format(start_time),"持续时间：{}".format(duration_time),"下段开始时间：{}".format(start_time + duration_time))
        #     print("字段内容段落数：{}".format(page_num),"每段持续时间：{}".format(every_page_time))

        # 标题位置
        txt_title = (
            TextClip(title, font=font_path, fontsize=50, color='black', method='label', align='West')
                .set_position((210, 320))
                .set_duration(duration_time)
                .set_start(start_time)
        )
        logo = (
            ImageClip("./material_jpg/base/logo.png")
                .set_duration(duration_time)  # 水印持续时间
                .resize(height=50)  # 水印的高度，会等比缩放
                .set_pos(("right", "top"))  # 水印的位置
        )
        # 根据地域信息生成png图
        # 制作省市显示图标数据bar
        n = 1
        list_data = []
        while n <= len(df["SS"][0]):
            list_data.append(1)
            n = n + 1
        name = pic_name + "的生长区域分布"

        # 制作背景图fuction
        def every_map():
            c = (
                Map()
                    .add(name, [list(z) for z in zip(area, list_data)], "china")
                    .set_global_opts(
                    visualmap_opts=opts.VisualMapOpts(max_=1),
                )
            )
            return c

        make_snapshot(snapshot, every_map().render(), "./material_jpg/" + pic_name + "/area.png")

        # 将图片进行裁剪
        from PIL import Image

        img = Image.open("./material_jpg/" + pic_name + "/area.png")
        img_size = img.size
        h = img_size[1]  # 图片高度
        w = img_size[0]  # 图片宽度
        x = 0.2 * w
        y = 0
        w = 0.8 * w
        h = h
        # 开始截取
        region = img.crop((x, y, x + w, y + h))
        region.save("./material_jpg/" + pic_name + "/area_cut.png")

        Image = (
            ImageClip("./material_jpg/" + pic_name + "/area_cut.png")
                .set_duration(all_duration_time)  # 水印持续时间
                .resize(height=500)  # 水印的高度，会等比缩放
                .set_pos((600, 90))  # 水印的位置
        )

        # 每个部分
        txt_list = []
        start_every = start_time

        for i in range(len(allstrs)):
            txt_every = (
                TextClip("".join(allstrs[i]), font=font_path, fontsize=40, color='black', method='label', align='West')
                    .set_position((230, 390))
                    .set_duration(every_page_time)
                    .set_start(start_every + every_page_time * i)
            )
            #         print(start_every + every_page_time * i)
            txt_list.append(txt_every)
        #         print("".join(allstrs[i]),start_time + every_page_time)
        start_time = start_time + duration_time

        result_list.append(txt_title)
        result_list = result_list + txt_list

    L = []
    path = "material_base/1s.mp4"
    video = VideoFileClip(path)
    for i in range(int(start_time) + 1):
        L.append(video)
    final_clip = concatenate_videoclips(L).set_duration(start_time).resize((1280, 720))
    cvc = CompositeVideoClip([final_clip, Image, logo] + result_list, size=(1280, 720))
    cvc.write_videofile("./material_everypart/" + pic_name + "/" + "4th.mp4", fps=60, remove_temp=False, verbose=True)

    # 将对应的音频和视频进行合成
    outfile_name = 'material_video/' + pic_name + '/' + '4th.mp4'
    subprocess.call('ffmpeg -i ' + 'material_everypart/' + pic_name + '/' + '4th.mp4'
                    + ' -i ' + 'material_mp3/' + pic_name + '/' + '4th.mp3' + ' -strict -2 -f mp4 '
                    + outfile_name, shell=True)
    print("第四部分内容处理完毕，如果发现黑屏请调整改部分的clean_word")


### 第五部分
def FifthPart(pic_name, df):
    # 选取第五部分的字段
    word_list = ["MJLS", "HXCF", "XDLCYJ", "XDHYJ"]
    data_list = []  # 选择有用的数据进行处理
    f_write = open('material_mp3/' + pic_name + "/" + '5th.mp3', 'wb')  # 处理有用数据的MP3

    for col in df[word_list].columns:
        if df[col][0] != "暂无数据":
            # 文本数据部分
            col_text = df[col]
            data_text = df[col][0]
            col_duration_time = time_name_dict[col + ".mp3"]
            one_dict = {col: {"text": data_text, "time": col_duration_time}}
            data_list.append(one_dict)

            # 语音数据部分
            f_read = open('material_mp3/' + pic_name + "/" + col + '.mp3', 'rb')
            f_write.write(f_read.read())
            f_read.close()
    f_write.flush()
    f_write.close()

    start_time = 0
    result_list = []
    all_duration_time = sum([list(data_list[i].values())[0]["time"] for i in range(len(data_list))])

    for i in range(len(data_list)):
        title = text_dict[list(data_list[i].keys())[0]]
        content = list(data_list[i].values())[0]["text"]
        duration_time = list(data_list[i].values())[0]["time"]

        # 获得文字长度
        words_len, words_result = clean_word(content)
        # 构建切换的字幕方法
        str_list = words_result.split("\n")[:-1]
        allstrs = []  # 分组切分后的汉字放在这里
        every_list = []
        for i in range(len(str_list)):
            every_list.append((str_list[i] + "\n"))
            if i == 0:
                allstrs.append(every_list)
            if i % 2 == 0 and i != 0:
                every_list = []
                allstrs.append(every_list)
            # 分多少页、和时长，由于切分音频是要 1|1|1|1 这么切分
        allstrs = [i for i in allstrs if i != []]  # 去除无用的空list避免报错
        allstrs = [i for i in allstrs if i != ['\n']]  # 去除无用的空list避免报错
        page_num, every_page_time = len(allstrs), duration_time / len(allstrs)
        #     print(len(allstrs),page_num , every_page_time)
        #     print("起始时间：{}".format(start_time),"持续时间：{}".format(duration_time),"下段开始时间：{}".format(start_time + duration_time))
        #     print("字段内容段落数：{}".format(page_num),"每段持续时间：{}".format(every_page_time))

        # 标题位置
        txt_title = (
            TextClip(title, font=font_path, fontsize=50, color='black', method='label', align='West')
                .set_position((210, 320))
                .set_duration(duration_time)
                .set_start(start_time)
        )
        logo = (
            ImageClip("./material_jpg/base/logo.png")
                .set_duration(duration_time)  # 水印持续时间
                .resize(height=50)  # 水印的高度，会等比缩放
                .set_pos(("right", "top"))  # 水印的位置
        )
        Image = (
            ImageClip("./material_jpg/base/yanjiu.jpg")
                .set_duration(all_duration_time)  # 水印持续时间
                .resize(height=250)  # 水印的高度，会等比缩放
                .set_pos(("center", 90))  # 水印的位置
        )

        # 每个部分
        txt_list = []
        start_every = start_time

        for i in range(len(allstrs)):
            txt_every = (
                TextClip("".join(allstrs[i]), font=font_path, fontsize=40, color='black', method='label', align='West')
                    .set_position((230, 390))
                    .set_duration(every_page_time)
                    .set_start(start_every + every_page_time * i)
            )
            #         print(start_every + every_page_time * i)
            txt_list.append(txt_every)
        #         print("".join(allstrs[i]),start_time + every_page_time)
        start_time = start_time + duration_time

        result_list.append(txt_title)
        result_list = result_list + txt_list

    L = []
    path = "material_base/1s.mp4"
    video = VideoFileClip(path)
    for i in range(int(start_time) + 1):
        L.append(video)
    final_clip = concatenate_videoclips(L).set_duration(start_time).resize((1280, 720))
    cvc = CompositeVideoClip([final_clip, Image, logo] + result_list, size=(1280, 720))
    cvc.write_videofile("./material_everypart/" + pic_name + "/" + "5th.mp4", fps=60, remove_temp=False, verbose=True)

    # 将对应的音频和视频进行合成
    outfile_name = 'material_video/' + pic_name + '/' + '5th.mp4'
    subprocess.call('ffmpeg -i ' + 'material_everypart/' + pic_name + '/' + '5th.mp4'
                    + ' -i ' + 'material_mp3/' + pic_name + '/' + '5th.mp3' + ' -strict -2 -f mp4 '
                    + outfile_name, shell=True)
    print("第五部分内容处理完毕，如果发现黑屏请调整改部分的clean_word")


### 第六部分
def SixthPart(pic_name, df):
    # 选取第五部分的字段
    word_list = ["FF"]
    data_list = []  # 选择有用的数据进行处理
    f_write = open('material_mp3/' + pic_name + "/" + '6th.mp3', 'wb')  # 处理有用数据的MP3

    for col in df[word_list].columns:
        if df[col][0] != "暂无数据":
            # 文本数据部分
            col_text = df[col]
            data_text = df[col][0]
            col_duration_time = time_name_dict[col + ".mp3"]
            one_dict = {col: {"text": data_text, "time": col_duration_time}}
            data_list.append(one_dict)

            # 语音数据部分
            f_read = open('material_mp3/' + pic_name + "/" + col + '.mp3', 'rb')
            f_write.write(f_read.read())
            f_read.close()
    f_write.flush()
    f_write.close()

    start_time = 0
    result_list = []
    all_duration_time = sum([list(data_list[i].values())[0]["time"] for i in range(len(data_list))])

    for i in range(len(data_list)):
        title = text_dict[list(data_list[i].keys())[0]]
        content = list(data_list[i].values())[0]["text"]
        duration_time = list(data_list[i].values())[0]["time"]

        # 获得文字长度
        words_len, words_result = clean_word(content)
        # 构建切换的字幕方法
        str_list = words_result.split("\n")[:-1]
        allstrs = []  # 分组切分后的汉字放在这里
        every_list = []
        for i in range(len(str_list)):
            every_list.append((str_list[i] + "\n"))
            if i == 0:
                allstrs.append(every_list)
            if i % 2 == 0 and i != 0:
                every_list = []
                allstrs.append(every_list)
            # 分多少页、和时长，由于切分音频是要 1|1|1|1 这么切分
        allstrs = [i for i in allstrs if i != []]  # 去除无用的空list避免报错
        allstrs = [i for i in allstrs if i != ['\n']]  # 去除无用的空list避免报错
        page_num, every_page_time = len(allstrs), duration_time / len(allstrs)
        #     print(len(allstrs),page_num , every_page_time)
        #     print("起始时间：{}".format(start_time),"持续时间：{}".format(duration_time),"下段开始时间：{}".format(start_time + duration_time))
        #     print("字段内容段落数：{}".format(page_num),"每段持续时间：{}".format(every_page_time))

        # 标题位置
        txt_title = (
            TextClip(title, font=font_path, fontsize=50, color='black', method='label', align='West')
                .set_position((210, 320))
                .set_duration(duration_time)
                .set_start(start_time)
        )
        logo = (
            ImageClip("./material_jpg/base/logo.png")
                .set_duration(duration_time)  # 水印持续时间
                .resize(height=50)  # 水印的高度，会等比缩放
                .set_pos(("right", "top"))  # 水印的位置
        )
        Image = (
            ImageClip("./material_jpg/base/fufang.jpg")
                .set_duration(all_duration_time)  # 水印持续时间
                .resize(height=250)  # 水印的高度，会等比缩放
                .set_pos(("center", 90))  # 水印的位置
        )

        # 每个部分
        txt_list = []
        start_every = start_time
        for i in range(len(allstrs)):
            txt_every = (
                TextClip("".join(allstrs[i]), font=font_path, fontsize=40, color='black', method='label', align='West')
                    .set_position((230, 390))
                    .set_duration(every_page_time)
                    .set_start(start_every + every_page_time * i)
            )
            #         print(start_every + every_page_time * i)
            txt_list.append(txt_every)
        #         print("".join(allstrs[i]),start_time + every_page_time)
        start_time = start_time + duration_time

        result_list.append(txt_title)
        result_list = result_list + txt_list

    L = []
    path = "material_base/1s.mp4"
    video = VideoFileClip(path)
    for i in range(int(start_time) + 1):
        L.append(video)
    final_clip = concatenate_videoclips(L).set_duration(start_time).resize((1280, 720))
    cvc = CompositeVideoClip([final_clip, Image, logo] + result_list, size=(1280, 720))
    cvc.write_videofile("./material_everypart/" + pic_name + "/" + "6th.mp4", fps=60, remove_temp=False, verbose=True)

    # 将对应的音频和视频进行合成
    outfile_name = 'material_video/' + pic_name + '/' + '6th.mp4'
    subprocess.call('ffmpeg -i ' + 'material_everypart/' + pic_name + '/' + '6th.mp4'
                    + ' -i ' + 'material_mp3/' + pic_name + '/' + '6th.mp3' + ' -strict -2 -f mp4 '
                    + outfile_name, shell=True)
    print("第六部分内容处理完毕，如果发现黑屏请调整改部分的clean_word")


### 制作封面
def MakeCoverMp4(pic_name):
    # 将封面生成1秒的视频
    from PIL import Image

    fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
    size = (1280, 720)
    vw = cv2.VideoWriter("./material_everypart/" + pic_name + "/" + "cover.mp4", fourcc=fourcc, fps=10, frameSize=size)
    f_read = cv2.imdecode(np.fromfile("./material_jpg/" + pic_name + "/result.jpg", dtype=np.uint8), cv2.IMREAD_COLOR)
    f_img = Image.fromarray(f_read)
    f_rs = f_img.resize([1280, 720], resample=Image.NONE)
    f_out = np.array(f_rs)
    for i in range(10):
        vw.write(f_out)
    vw.release()

    name = pic_name

    Txt = (
        TextClip(name, font='./font/kaiti.ttf', fontsize=80, color='black', method='label')
            .set_position(("center", 420))
            .set_duration(1)  # 水印持续时间
    )

    path = "material_everypart/" + pic_name + "/" + "cover.mp4"
    video = VideoFileClip(path).resize((1280, 720))

    cvc = CompositeVideoClip([video, Txt], size=(1280, 720))
    cvc.write_videofile("./material_video/" + pic_name + "/" + "cover.mp4", fps=60, remove_temp=False, verbose=False)


### 按照顺序将视频进行拼接
def StitchingVideo(pic_name):
    file_dir = "material_video/" + pic_name + "/"
    video_list = [files for files in os.walk(file_dir)][0][2]

    # 素材路径
    path_material = "material_video/" + pic_name + "/"
    # 切换过场路径
    path_chanege = "material_base/change.mp4"
    # 封面文件路径
    path_cover = "./material_video/" + pic_name + "/" + "cover.mp4"
    # 结尾文件路径
    path_end = "material_base/end.mp4"

    L = []
    video = VideoFileClip(path_cover).resize((1280, 720))
    L.append(video)
    if "1st.mp4" in video_list:
        video = VideoFileClip(path_material + "1st.mp4").resize((1280, 720)).fadein(2, (1, 1, 1))
        L.append(video)
    if "2nd.mp4" in video_list:
        video = VideoFileClip(path_chanege).resize((1280, 720))
        L.append(video)
        video = VideoFileClip(path_material + "2nd.mp4").resize((1280, 720))
        video = video.set_duration(video.duration - 0.5)
        L.append(video)
    if "3rd.mp4" in video_list:
        video = VideoFileClip(path_chanege).resize((1280, 720))
        L.append(video)
        video = VideoFileClip(path_material + "3rd.mp4").resize((1280, 720))
        video = video.set_duration(video.duration - 0.5)
        L.append(video)
    if "4th.mp4" in video_list:
        video = VideoFileClip(path_chanege).resize((1280, 720))
        L.append(video)
        video = VideoFileClip(path_material + "4th.mp4").resize((1280, 720))
        video = video.set_duration(video.duration - 0.5)
        L.append(video)
    if "5th.mp4" in video_list:
        video = VideoFileClip(path_chanege).resize((1280, 720))
        L.append(video)
        video = VideoFileClip(path_material + "5th.mp4").resize((1280, 720))
        video = video.set_duration(video.duration - 0.5)
        L.append(video)
    if "6th.mp4" in video_list:
        video = VideoFileClip(path_chanege).resize((1280, 720))
        L.append(video)
        video = VideoFileClip(path_material + "6th.mp4").resize((1280, 720))
        video = video.set_duration(video.duration - 0.5)
        L.append(video)

    video = VideoFileClip(path_end).resize((1280, 720)).fadein(2, (1, 1, 1))
    L.append(video)

    final_clip = concatenate_videoclips(L)

    # 生成目标视频文件
    final_clip.to_videofile(path_material + "result.mp4", fps=60, remove_temp=False)

    # 将配乐和视频进行合成
    inmp4 = 'material_video/' + pic_name + '/' + 'result.mp4'
    inmp3 = 'material_mp3/music.mp3'
    outmp4 = 'material_result/【每日学中药】' + pic_name + '.mp4'

    cmd = 'ffmpeg -y -i ' + inmp4 + ' -i ' + inmp3 + ' -filter_complex \
    "[0:a]volume=10dB[a0]; \
    [1:a]volume=-10dB[a1]; \
    [a0][a1]amix=inputs=2[a]" \
    -map 0:v -map "[a]" -c:v copy -c:a aac -shortest ' + outmp4

    p = subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print("合成最终文件和背景音乐完毕")

    ### 删除无用文件
    # 删除目录下生成的MP3文件
    path = "./"
    for infile in glob.glob(os.path.join(path, '*.mp3')):
        os.remove(infile)
    for infile in glob.glob(os.path.join(path, '*.html')):
        os.remove(infile)

### 自动抓取百度图片第一个位置的图片
def RequestGetImage(pic_name):
    jpg_name = "./material_jpg/" + pic_name + "/" + pic_name + ".jpg"
    if os.path.exists(jpg_name):
        print("该图片的内容已经存在")
    else:
        name_code = parse.quote(pic_name)
        headers = {
            'user-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
            'Referer': 'http://www.zhihu.com/articles'
        }
        try:
            #     url = "https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word={}".format(name_code)
            url = "https://image.baidu.com/search/index?ct=201326592&tn=baiduimage&word=%E8%8D%89%E8%8D%AF%20{}&pn=0&ie=utf-8&oe=utf-8&cl=2&lm=-1&fr=&se=&sme=&hd=1&latest=0&copyright=0".format(
                name_code)
            html = requests.get(url, headers=headers)
            p = re.compile("thumbURL.*?\.jpg")
            s = p.findall(html.text)
            jpg_url = s[0].split("\"")[-1]
        except:
            url = "https://image.baidu.com/search/index?ct=201326592&tn=baiduimage&word={}&pn=0&ie=utf-8&oe=utf-8&cl=2&lm=-1&fr=&se=&sme=&hd=1&latest=0&copyright=0".format(
                name_code)
            html = requests.get(url, headers=headers)
            p = re.compile("thumbURL.*?\.jpg")
            s = p.findall(html.text)
            jpg_url = s[0].split("\"")[-1]

        html = requests.get(jpg_url, headers=headers)
        with open( 'material_jpg/' + pic_name + '/' + pic_name + ".jpg", 'wb') as file:
            file.write(html.content)
        file.close()


if __name__ == '__main__':
    """读取制作内容数据"""
    # username = "zgtNewsUser"
    # password = "ciiczgtJZFP"
    # client = pymongo.MongoClient(host="localhost", port=27017, username=username, password=password)
    # db = client['TCM_Datas']
    # df = pd.DataFrame(list(db.TCM.find({})))
    # df.to_excel("data.xlsx")

    df = pd.read_excel("data/data.xlsx")
    num = 2  # 视频制作的数量第XXX个
    row = num - 1  # 这里设置数字
    df = df.loc[row:row, :]
    df.reset_index(drop=True, inplace=True)
    pic_name = df["药材名称"][0].replace("?", "")
    print("本次制作内容：", pic_name)

    # 重命名列信息
    df = df.rename(columns={
        '药材名称': "CnName",
        '汉语拼音': "PinYin",
        '文件原文': "LaiYuan",
        '原拉丁植物动物矿物名': "LaDing",
        '药材基原': "YCJY",
        '性味归经': "YWGJ",
        '功能主治': "GNZZ",
        '地理分布': "QYFB",
        '省市': "SS",
        '英文名': "EnName",
        '药材别名': "YCBM",
        '化学成分': "HXCF",
        '名家论述': "MJLS",
        '拉丁文名': "LDWM",
        '药理作用': "YLZY",
        '附方': "FF",
        '现代临床研究': "XDLCYJ",
        '注意事项': "ZYSX",
        '药用类别': "YYLB",
        '现代化研究': "XDHYJ"})
    # df.drop(["_id"],inplace=True,axis=1)
    # df.drop(["url"],inplace=True,axis=1)
    # 清理掉无用的数据部分
    df["QYFB"][0] = df["QYFB"][0].replace(" ", "").replace("生态环境", "").replace("资源分布", "")
    df = df.fillna("暂无数据")

    # 配置文件
    font_path, text_dict = config()
    # 创建相关文件夹
    MakeMaterialDir(pic_name)
    # 清空旧的数据
    CleanFiles(pic_name)
    # 从百度抓取图片，如果错误需要更换
    RequestGetImage(pic_name)
    # 裁剪图片
    CutoutJPG(pic_name)
    # 合成图片到封面
    CompositeCoverJPG(pic_name)
    # 音频文件数据处理
    ChangeWordsToMp3(df)
    time_name_dict = Mp3Info(df)
    # 正文部分1-6
    try:
        FirstPart(pic_name, df)
    except:
        pass
    try:
        SecondPart(pic_name, df)
    except:
        pass
    try:
        ThirdPart(pic_name, df)
    except:
        pass
    try:
        FourthPart(pic_name, df)
    except:
        pass
    try:
        FifthPart(pic_name, df)
    except:
        pass
    try:
        SixthPart(pic_name, df)
    except:
        pass
    # 合成封面MP4文件
    MakeCoverMp4(pic_name)
    # 拼接合成背景音乐
    StitchingVideo(pic_name)
