# Data Science Discovery Project - UCSC MARINe Academic Research - Pyecharts

## Import Modules

In [65]:
from pyecharts.globals import CurrentConfig
CurrentConfig.ONLINE_HOST = "pyecharts-assets-master/assets/"
import pandas as pd
from pyecharts.charts import Bar, Grid, Line, Page, Pie
from pyecharts.charts import ThemeRiver
from pyecharts.globals import ThemeType
from pyecharts.charts import Map3D
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ChartType, SymbolType
from bs4 import BeautifulSoup
import os
import numpy as np
import pyecharts.options as opts
from pyecharts.charts import Surface3D
from pyecharts.charts import Scatter3D

## Load Dataset and Processing

In [66]:
# === df: Species Height data
df = pd.read_excel("source data/species_height_data.xlsx",sheet_name="point_contact_raw_with_heights")
# === data_elevation: Species Height data
data_elevation = pd.read_excel("source data/elevation_data.xlsx",sheet_name="true_y_consolidated")
# === k_means: Machine Learning Data
k_means = pd.read_csv("Machine_Learning_K_means_result.csv")

## Define site name

In [67]:
site_name = "Sea Ranch"
# site_name = "Point Sierra Nevada"
# site_name = "Bodega"
# site_name = "Fitzgerald Marine Reserve"

## Data Processing

In [68]:
# === Data Processing
df_1 = df.copy()
df_1 = df_1.loc[df_1["intertidal_sitename"] == site_name]
df_1["year"] = df_1["year"].apply(lambda x: str(x)+"-")
df_1["month"] = df_1["month"].apply(lambda x: str(x)+"-")
df_1["day"] = df_1["day"].apply(lambda x: str(x)+"-")
df_1["time"] = df_1["year"] + df_1["month"] + df_1["day"]
df_1["time"] = df_1["time"].apply(lambda x:pd.to_datetime((x)))
df_1 = df_1[["time","species_lump","pc_point_type"]]
data = pd.DataFrame(df_1.groupby(["time","species_lump"]).count())
# === get value list + dataframe construction
amount_lst = [i for i in data["pc_point_type"]]
time_lst = []
specie_lst = []
for i in data.index:
    time_lst.append(i[0])
    specie_lst.append(i[1])
data = pd.DataFrame(columns = ["time","species","amount"])
data["time"] = time_lst
data["species"] = specie_lst
data["amount"] = amount_lst
data["amount"] = data["amount"].apply(lambda x: int(x))
species_list = list(set([i for i in df_1["species_lump"]]))

## Bar Chart - Check one sitename's species amount change

In [69]:
def chart_a():
    # === Prepare x data and y data
    x_data = list(set(time_lst))
    x_data.sort()
    y_data = {}
    for item in species_list:
        temp_df = data.loc[data["species"]==item]
        temp_list = []
        for time_value in x_data:
            try:
                value_find = int(temp_df.loc[temp_df["time"] == time_value, "amount"].values[0])
                temp_list.append(value_find)
            except Exception as e:
                temp_list.append(0)
        y_data[item] = temp_list
    # === Bar Chart
    bar = Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    bar.add_xaxis([i.strftime("%Y-%m-%d") for i in x_data])
    bar.set_global_opts(
        title_opts = opts.TitleOpts(title="Bar Chart - Species Change Comparison in " + site_name),
        datazoom_opts = [opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
        legend_opts = opts.LegendOpts(pos_top="10%")

    )
    for i in y_data.keys():
        bar.add_yaxis(i, y_data[i])
    # === Add to grid
    g = Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    g.add(bar, grid_opts=opts.GridOpts(pos_top="25%"))
    g.chart_id = "1"
    # === Return grid
    return g

## River Chart - Show the breakdown of species change in one site names

In [70]:
def chart_b():
    # === Prepare x data and y data
    data_copy = data[["time","amount","species"]]
    data_copy["time"] = data_copy["time"].apply(lambda x: x.strftime("%Y-%m-%d"))
    # === River Chart
    c_ThemeRiver = ThemeRiver(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    c_ThemeRiver.add(
            series_name=species_list,
            data=data_copy.values.tolist(),
            singleaxis_opts=opts.SingleAxisOpts(
                pos_top="90", pos_bottom="30", type_="time"
            ),
        )
    c_ThemeRiver.set_global_opts(
            tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="line"),
            title_opts=opts.TitleOpts(title="River Chart - Change of species in " + site_name),
            legend_opts = opts.LegendOpts(orient='horizontal', pos_top='top', pos_right='right')
        )
    c_ThemeRiver.set_series_opts(label_opts=opts.LabelOpts(is_show = 0))
    c_ThemeRiver.chart_id = "2"
    # === Return Rverchart
    return c_ThemeRiver

## Bar Chart - Check one sitename' s spcies breakdown in specific times

In [71]:
def chart_c():
    # === Data Preparaing
    data_copy = data.groupby("species").sum()
    data_copy["amount"] = data_copy["amount"].apply(lambda x: round(x/data_copy["amount"].sum(),2))
    value = [i for i in data_copy["amount"]]
    attr = [i for i in data_copy.index]
    # === Pie Chart
    pie = Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    pie.add(series_name="",
        data_pair = [list(z) for z in zip(attr, value)],
        radius=["30%", "75%"],
        rosetype="radius",
        label_opts=opts.LabelOpts(is_show=True),
        is_legend_hover_link=True

    )
    pie.set_global_opts(
        title_opts=opts.TitleOpts(title="Pie Chart - Species Full Amount Percentage Analysis in " + site_name),
        legend_opts = opts.LegendOpts(is_show=0)
    )
    pie.chart_id = "3"
    # === Return Pie Chart
    return pie

## United States Map

In [72]:
def chart_d():
    # === Preparaing data
    df_2 = df.copy()
    df_3 = df.copy()
    df_2 = df_2.drop_duplicates(subset=["intertidal_sitename"])
    df_3 = df_3.groupby("intertidal_sitename").count()
    df_3["site"] = df_3.index
    data_dict = pd.DataFrame(columns=["site","latitude","longitude","species lump amount"])
    for i in range(0, len(df_2)):
        data_dict.loc[len(data_dict)+1] = [
            df_2.iloc[i]["intertidal_sitename"],
            round(df_2.iloc[i]["latitude"],3),
            round(df_2.iloc[i]["longitude"],3),
            df_3.loc[df_3["site"] == df_2.iloc[i]["intertidal_sitename"], "reference_type"].values[0],
        ]
    example_data = []
    for i in range(0,len(data_dict)):
        example_data.append(
            (str(data_dict.iloc[i]["site"]),
             [float(data_dict.iloc[i]["longitude"]),
              float(data_dict.iloc[i]["latitude"]),
              int(data_dict.iloc[i]["species lump amount"])]
             )
        )
    # === Map
    map3d = Map3D(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    map3d.add(
        series_name = 'Species Lump Amount',
        data_pair = example_data,
        is_animation = True,
        type_ = ChartType.BAR3D,
        maptype='United States',
        itemstyle_opts=opts.ItemStyleOpts(
            color = 'steelblue',),
        label_opts=opts.LabelOpts(
            is_show=True,
            formatter=JsCode("function(data){return data.name +'\\n'+ data.value[2];}")),
        emphasis_label_opts=opts.LabelOpts(
            is_show=True),
        emphasis_itemstyle_opts=opts.ItemStyleOpts(
            color='#33FF00')
    )
    # add bottom map
    map3d.add_schema(
        maptype='美国',
        region_height=4,
        is_show_ground=False,
        itemstyle_opts=opts.ItemStyleOpts(
            color = '#F5F5F5',
            border_color='#333333',
            border_type='dotted',
            border_width=1),
        emphasis_label_opts=opts.LabelOpts(
            is_show=True),
        emphasis_itemstyle_opts=opts.ItemStyleOpts(
            color = 'gold'),
        shading='lambert',
        light_opts=opts.Map3DLightOpts(
            main_color='#FFFFCC',
            is_main_shadow = True),
    )
    map3d.set_global_opts(
        title_opts=opts.TitleOpts(title="3D Chart - United States"),
    )
    map3d.chart_id = "4"
    # === Return Map
    return map3d

## Geologic Surface + species distributions

In [73]:
def chart_e():
    # Processing Elevation data
    df_1 = data_elevation.copy()
    df_1 = df_1.loc[df_1["intertidal_sitename"]==site_name]
    df_1 = df_1[["y_location","x_transect","Mean(z_rock_height)"]]
    data_pair = np.array(df_1).tolist()

    # === Create Surface
    surface = Surface3D(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    surface.add(
        series_name="Surface of "+ site_name,
        shading="color",
        data=data_pair,
        xaxis3d_opts=opts.Axis3DOpts(type_="value",name="X Transect",textstyle_opts=opts.TextStyleOpts(color="white")),
        yaxis3d_opts=opts.Axis3DOpts(type_="value",name="Y Location",textstyle_opts=opts.TextStyleOpts(color="white")),
        zaxis3d_opts=opts.Axis3DOpts(type_='value',name='Mean(z_rock_height)',textstyle_opts=opts.TextStyleOpts(color="white")),
        grid3d_opts=opts.Grid3DOpts(width=100, height=40, depth=100),
    )
    surface.set_global_opts(
        title_opts=opts.TitleOpts(title="Geologic Surface Chart"),
        visualmap_opts=opts.VisualMapOpts(
            dimension=2,
            max_=df["Mean(z_rock_height)"].max(),
            min_=df["Mean(z_rock_height)"].min(),
            range_color=["#313695","#4575b4","#74add1","#abd9e9","#e0f3f8","#ffffbf","#fee090","#fdae61","#f46d43","#d73027","#a50026",],
        )
    )

    # === Data Preparation
    df_2 = df.copy()
    df_2.sort_values(by="species_lump")
    spec_lst = list(set([i for i in df_2["species_lump"]]))
    data_dict = {}
    for i in spec_lst:
        temp_df = df_2.loc[df_2["species_lump"]==i]
        temp_lst = []
        for j in range(0,len(temp_df)):
            temp_lst.append(
                (
                    float(temp_df.iloc[j]["x_transect"]),
                    float(temp_df.iloc[j]["y_location"]),
                    float(temp_df.iloc[j]["Mean(z_rock_height)"])
                )
            )
        data_dict[i] = temp_lst
        del temp_lst
        del temp_df

    # === Scatter plots
    scatter_3d = Scatter3D(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    for spec in data_dict.keys():
        scatter_3d.add(
            series_name=spec,
            data=data_dict[spec],
            xaxis3d_opts=opts.Axis3DOpts(
                type_="value",
            ),
            yaxis3d_opts=opts.Axis3DOpts(
                type_="value",
                textstyle_opts=opts.TextStyleOpts(color="white"),
            ),
            zaxis3d_opts=opts.Axis3DOpts(
                type_="value",
            ),
            grid3d_opts=opts.Grid3DOpts(width=50, height=50, depth=50),
        )
    scatter_3d.set_global_opts(
        legend_opts = opts.LegendOpts(orient='horizontal', pos_top='10%'),
    )
    # === Create Grid
    g = Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    g.add(surface, grid_opts=opts.GridOpts(pos_top="25%"))
    g.add(scatter_3d, grid_opts=opts.GridOpts(pos_top="25%"))
    g.chart_id = "5"
    # === Return Grid
    return g

## Scatter Chart - ML K-Means Chart

In [74]:
def chart_f():
    # === Data processing
    df_1 = data_elevation.copy()
    df_1 = df_1.loc[df_1["intertidal_sitename"]==site_name]
    # 时间取最大
    df_1 = df_1[["y_location","x_transect","Mean(z_rock_height)"]]
    data_pair = np.array(df_1).tolist()
    # === Create Surface
    surface = Surface3D(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    surface.add(
        series_name="Surface of "+ site_name,
        shading="color",
        data=data_pair,
        xaxis3d_opts=opts.Axis3DOpts(type_="value",name="X Transect",textstyle_opts=opts.TextStyleOpts(color="white")),
        yaxis3d_opts=opts.Axis3DOpts(type_="value",name="Y Location",textstyle_opts=opts.TextStyleOpts(color="white")),
        zaxis3d_opts=opts.Axis3DOpts(type_='value',name='Mean(z_rock_height)',textstyle_opts=opts.TextStyleOpts(color="white")),
        grid3d_opts=opts.Grid3DOpts(width=100, height=40, depth=100),
    )
    # Get K-Means Data
    k_means = pd.read_csv("Machine_Learning_K_means_result.csv")
    k_means = k_means.loc[k_means["sitename"]==site_name]
    category_lst = list(set([i for i in k_means["species"]]))
    data_dict = {}
    for i in category_lst:
        temp_df = k_means.loc[k_means["species"]==i]
        temp_lst = []
        for j in range(0,len(temp_df)):
            temp_lst.append(
                (
                    float(temp_df.iloc[j]["x_location"]),
                    float(temp_df.iloc[j]["y_location"]),
                    float(temp_df.iloc[j]["z_location"])
                )
            )
        data_dict[i] = temp_lst
        del temp_lst
        del temp_df
    # === Scatter plots
    scatter_ml = Scatter3D(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    for spec in data_dict.keys():
        scatter_ml.add(
            series_name=spec,
            data=data_dict[spec],
            xaxis3d_opts=opts.Axis3DOpts(
                type_="value",
            ),
            yaxis3d_opts=opts.Axis3DOpts(
                type_="value",
                textstyle_opts=opts.TextStyleOpts(color="white"),
            ),
            zaxis3d_opts=opts.Axis3DOpts(
                type_="value",
            ),
            grid3d_opts=opts.Grid3DOpts(width=150, height=150, depth=150),
        )
    scatter_ml.set_global_opts(
        title_opts=opts.TitleOpts(title="ML: K-Means Clustering of species"),
        legend_opts = opts.LegendOpts(orient='horizontal', pos_top='15%'),
    )
    # === Create grid
    g = Grid(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    g.add(surface, grid_opts=opts.GridOpts(pos_top="28%"))
    g.add(scatter_ml, grid_opts=opts.GridOpts(pos_top="28%"))
    g.chart_id = "6"
    # === Return Grid
    return g

## Page Layout

In [75]:
page = (
    Page(layout=Page.SimplePageLayout)
    .add(
        chart_e(),
        chart_a(),
        chart_f(),
        chart_d(),
        chart_b(),
        chart_c(),
        chart_f(),
)
)

page.render("page_demo.html")  # 生成HTML文件

'F:\\Coding Projects\\Data_Spell_Workspace\\Geospatial_Study\\page_demo.html'

In [76]:
# Page.save_resize_html("raphael_demos.html", cfg_file="chart_config.json", dest="raphael_demos2.html")

## HTML Background Change

In [77]:
with open(os.path.join(os.path.abspath("."), "page_demo.html"), 'r+', encoding="utf8") as html:
    html_bf = BeautifulSoup(html, "lxml")
    divs = html_bf.find_all("div")

    body = html_bf.find("body")
    body["style"]="background-color:#333333;"
    html_new = str(html_bf)
    html.seek(0, 0)
    html.truncate()
    html.write(html_new)
    html.close()