# USCS Academic Research - MARINe Site Visualisation Notebook
- Weijie Yang
- Jenna Sparks

## Step 1 - Import necessary packages

In [1]:
!pip install --upgrade pyecharts
import pandas as pd
from pyecharts.charts import Bar, Grid, Line, Page, Pie
from pyecharts.charts import ThemeRiver
from pyecharts.globals import ThemeType
from pyecharts.charts import Map3D
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ChartType, SymbolType
from bs4 import BeautifulSoup
import os
import numpy as np
import pyecharts.options as opts
from pyecharts.charts import Surface3D
from pyecharts.charts import Scatter3D
from pyecharts.charts import Page
from collections import OrderedDict

Collecting pyecharts
  Downloading pyecharts-2.0.4-py3-none-any.whl (147 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/147.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━[0m [32m112.6/147.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.7/147.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting simplejson (from pyecharts)
  Downloading simplejson-3.19.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.9/137.9 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: simplejson, pyecharts
Successfully installed pyecharts-2.0.4 simplejson-3.19.2


## Step 2 - Load dataset

In [2]:
# Import downloaded file
from google.colab import drive
drive.mount('/content/drive')
from google.colab import files
uploaded = files.upload()

Mounted at /content/drive


Saving ddiscovery_elevation_data_update_20231108.xlsx to ddiscovery_elevation_data_update_20231108.xlsx


In [3]:
# Load species data
df = pd.read_excel("ddiscovery_elevation_data_update_20231108.xlsx", sheet_name="point_contact_per_cover")
# reformat df
df = df.rename(columns={'marine_site_name': 'intertidal_sitename', 'latitude': 'site_latitude', 'longitude': 'site_longitude'})
# Group df by 'intertidal_sitename'
grouped_data = df.groupby('intertidal_sitename')

In [4]:
# load dataset for surface map
elevation_df = pd.read_excel("ddiscovery_elevation_data_update_20231108.xlsx",sheet_name="elevation_data")
species_df = pd.read_excel("ddiscovery_elevation_data_update_20231108.xlsx",sheet_name="species_heights")

## Step 3 - Establish functions for each visualization


### Diagram - Surface Map
- Display the surface of the site in the latest year & species distributions

In [5]:
def surface(elevation_df, species_df, site_name):
  # processing dataset
  elevation_df = elevation_df.loc[elevation_df["intertidal_sitename"] == site_name]
  # Trim out "Other Species"
  species_df = species_df.loc[species_df["species_lump"] != "Other Species"]
  species_df = species_df.loc[species_df["intertidal_sitename"] == site_name]
  species_df = species_df.loc[species_df["year"] == max(species_df["year"].tolist())] # get the latest year's species
  elevation_df = elevation_df[["intertidal_sitename","section","x_transect","location","Mean(z_rock_height)"]]
  species_df = species_df[["intertidal_sitename","section","x_transect","location","Mean(z_rock_height)","species_lump"]]
  input_df = pd.merge(left = elevation_df, right = species_df, on = ["section","x_transect","location","Mean(z_rock_height)"], how = "left", indicator = False)

  # identify whether there are two sections
  if len(set(input_df["section"].tolist())) == 1:
    # only one section
    surface_df = input_df[["location","x_transect","Mean(z_rock_height)"]]
    data_pair = np.array(surface_df).tolist()
    # === Create Surface
    surface = Surface3D()
    surface.add(
        series_name="Surface of "+ site_name,
        shading="color",
        data=data_pair,
        xaxis3d_opts=opts.Axis3DOpts(type_="value",name="X Transect"),
        yaxis3d_opts=opts.Axis3DOpts(type_="value",name="Y Location"),
        zaxis3d_opts=opts.Axis3DOpts(type_='value',name='Mean(z_rock_height)'),
        grid3d_opts=opts.Grid3DOpts(width=100, height=40, depth=100),
    )
    surface.set_global_opts(
        title_opts=opts.TitleOpts(title="Surface Chart - " + site_name),
        visualmap_opts=opts.VisualMapOpts(
            dimension=2,
            max_=surface_df["Mean(z_rock_height)"].max(),
            min_=surface_df["Mean(z_rock_height)"].min(),
        )
    )


    # === species data
    distribution_df = input_df.copy()
    distribution_df.sort_values(by="species_lump")
    spec_lst = list(set([i for i in distribution_df["species_lump"]]))
    if np.nan in spec_lst:
      spec_lst.remove(np.nan)
    data_dict = {}
    for i in spec_lst:
        temp_df = distribution_df.loc[distribution_df["species_lump"]==i]
        temp_lst = []
        for j in range(0,len(temp_df)):
            temp_lst.append(
                [
                    float(temp_df.iloc[j]["x_transect"]),
                    float(temp_df.iloc[j]["location"]),
                    float(temp_df.iloc[j]["Mean(z_rock_height)"])
                ]
            )
        data_dict[i] = temp_lst
        del temp_lst
        del temp_df


    # === Create distribution
    scatter_3d = Scatter3D()
    for spec in data_dict.keys():
        scatter_3d.add(
            series_name=spec,
            data=data_dict[spec],
            xaxis3d_opts=opts.Axis3DOpts(type_="value"),
            yaxis3d_opts=opts.Axis3DOpts(type_="value"),
            zaxis3d_opts=opts.Axis3DOpts(type_="value"),
            grid3d_opts=opts.Grid3DOpts(width=50, height=50, depth=50))
    scatter_3d.set_global_opts(legend_opts = opts.LegendOpts(orient='horizontal', pos_top='12%'))

    # === Create Grid
    final_graph = Grid(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))
    final_graph.add(surface, grid_opts=opts.GridOpts(pos_top="25%"))
    final_graph.add(scatter_3d, grid_opts=opts.GridOpts(pos_top="25%"))


  else:
    # have two sections
    # complete section 1
    surface_sec_1 = input_df.loc[input_df["section"] == 1]
    surface_sec_1 = surface_sec_1[["location","x_transect","Mean(z_rock_height)"]]
    data_pair = np.array(surface_sec_1).tolist()
    # === Create Surface
    surface01 = Surface3D()
    surface01.add(
        series_name="Surface of "+ site_name,
        shading="color",
        data=data_pair,
        xaxis3d_opts=opts.Axis3DOpts(type_="value",name="X Transect"),
        yaxis3d_opts=opts.Axis3DOpts(type_="value",name="Y Location"),
        zaxis3d_opts=opts.Axis3DOpts(type_='value',name='Mean(z_rock_height)'),
        grid3d_opts=opts.Grid3DOpts(width=100, height=40, depth=100),
    )
    surface01.set_global_opts(
        title_opts=opts.TitleOpts(title="Surface Chart - " + site_name),
        visualmap_opts=opts.VisualMapOpts(
            dimension=2,
            max_=surface_sec_1["Mean(z_rock_height)"].max(),
            min_=surface_sec_1["Mean(z_rock_height)"].min(),
        )
    )


    # section 2
    surface_sec_2 = input_df.loc[input_df["section"] == 2]
    surface_sec_2 = surface_sec_2[["location","x_transect","Mean(z_rock_height)"]]
    data_pair = np.array(surface_sec_2).tolist()
    # === Create Surface
    surface02 = Surface3D()
    surface02.add(
        series_name="Surface of "+ site_name,
        shading="color",
        data=data_pair,
        xaxis3d_opts=opts.Axis3DOpts(type_="value",name="X Transect"),
        yaxis3d_opts=opts.Axis3DOpts(type_="value",name="Y Location"),
        zaxis3d_opts=opts.Axis3DOpts(type_='value',name='Mean(z_rock_height)'),
        grid3d_opts=opts.Grid3DOpts(width=100, height=40, depth=100),
    )
    surface02.set_global_opts(
        visualmap_opts=opts.VisualMapOpts(
            dimension=2,
            max_=surface_sec_2["Mean(z_rock_height)"].max(),
            min_=surface_sec_2["Mean(z_rock_height)"].min(),
        )
    )


    # === species data
    distribution_df = input_df.copy()
    distribution_df.sort_values(by="species_lump")
    spec_lst = list(set([i for i in distribution_df["species_lump"]]))
    if np.nan in spec_lst:
      spec_lst.remove(np.nan)
    data_dict = {}
    for i in spec_lst:
        temp_df = distribution_df.loc[distribution_df["species_lump"]==i]
        temp_lst = []
        for j in range(0,len(temp_df)):
            temp_lst.append(
                [
                    float(temp_df.iloc[j]["x_transect"]),
                    float(temp_df.iloc[j]["location"]),
                    float(temp_df.iloc[j]["Mean(z_rock_height)"])
                ]
            )
        data_dict[i] = temp_lst
        del temp_lst
        del temp_df


    # === Create distribution
    scatter_3d02 = Scatter3D()
    for spec in data_dict.keys():
        scatter_3d02.add(
            series_name=spec,
            data=data_dict[spec],
            xaxis3d_opts=opts.Axis3DOpts(type_="value"),
            yaxis3d_opts=opts.Axis3DOpts(type_="value"),
            zaxis3d_opts=opts.Axis3DOpts(type_="value"),
            grid3d_opts=opts.Grid3DOpts(width=50, height=50, depth=50))
    scatter_3d02.set_global_opts(legend_opts = opts.LegendOpts(orient='horizontal', pos_top='12%'))


    # === Create Grid
    final_graph = Grid(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))
    final_graph.add(surface01, grid_opts=opts.GridOpts(pos_top="25%"))
    final_graph.add(surface02, grid_opts=opts.GridOpts(pos_top="25%"))
    final_graph.add(scatter_3d02, grid_opts=opts.GridOpts(pos_top="25%"))

  # === Return Grid
  return final_graph

### Diagram - Themeriver
- Display the percentage change of the species with years

In [6]:
def themeriver(site_df, site_name):
  # Calculate total percent_cover for each species over the years
  site_df["total_percent_cover"] = site_df.groupby(["year", "species_lump"])["percent_cover"].transform("sum")
  site_df = site_df.loc[site_df["species_lump"] != "Other Species"]
  # Prepare data for the river chart
  data_copy = []
  species_list = site_df["species_lump"].unique()
  for species in species_list:
      species_data = site_df[site_df["species_lump"] == species]
      total_percent_species = species_data["percent_cover"].sum()

      data_copy.extend([
          [str(year), round(float(species_data[species_data["year"] == year]["percent_cover"].sum()),10), species]
          for year in species_data["year"].unique()
      ])

  # River Chart
  c_ThemeRiver = ThemeRiver(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))  # Use LIGHT theme for a white background
  c_ThemeRiver.add(
      series_name=list(species_list),
      data=data_copy,
      singleaxis_opts=opts.SingleAxisOpts(
          pos_top="90", pos_bottom="30", type_="time",
          splitline_opts=opts.SplitLineOpts(is_show=True),
      ),
  )

  # Set options
  c_ThemeRiver.set_global_opts(
      tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="line"),
      title_opts=opts.TitleOpts(title="River Chart - Change of Species in " + site_name),
      legend_opts=opts.LegendOpts(orient='horizontal', pos_top="7%"),
  )
  c_ThemeRiver.set_series_opts(label_opts=opts.LabelOpts(is_show=0))
  c_ThemeRiver.chart_id = "2"


  # Add to grid
  graph = Grid(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))  # Use LIGHT theme for a white background
  graph.add(c_ThemeRiver, grid_opts=opts.GridOpts(pos_top="35%"))

  # Return River Chart
  return graph

### Diagram - Pie Chart
- Display the latest year's species % at the site

In [7]:
def pie(site_df, site_name):
  # Find the most recent year in the DataFrame
  most_recent_year = site_df['year'].max()
  # Filter the DataFrame for the most recent year
  recent_year_data = site_df[site_df['year'] == most_recent_year]
  # Calculate total percent_cover for each species for the most recent year
  total_percent_cover = recent_year_data.groupby("species_lump")["percent_cover"].sum()
  # Data Preparing
  data_copy = pd.DataFrame({
      'species_lump': total_percent_cover.index,
      'total_percent_cover': total_percent_cover.values
  })
  value = [round(i, 2) for i in data_copy["total_percent_cover"]]
  attr = [i for i in data_copy["species_lump"]]


  # Pie Chart
  pie = Pie(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))  # Use LIGHT theme for a white background
  pie.add(series_name="",
          data_pair=[list(z) for z in zip(attr, value)],
          radius=["30%", "75%"],
          rosetype="radius",
          label_opts=opts.LabelOpts(is_show=True),
          is_legend_hover_link=True
          )
  pie.set_global_opts(
      title_opts=opts.TitleOpts(title=f"Pie Chart - {most_recent_year} Species in {site_name}"),
      legend_opts=opts.LegendOpts(is_show=0)
  )

  # Return Pie Chart
  return pie

### Diagram - Bar Chart
- Display the percentage species coverage

In [8]:
def bar(site_df, site_name):
  # Calculate the total percent_cover for each species over the years
  site_df["total_percent_cover"] = site_df.groupby(["year", "species_lump"])["percent_cover"].transform("sum")
  # Group by year and species, then sum the percentages
  species_percentages = site_df.groupby(['year', 'species_lump'])['percent_cover'].sum().reset_index()
  # Pivot the DataFrame to have species as columns and years as indices
  pivot_df = species_percentages.pivot_table(index='year', columns='species_lump', values='percent_cover', aggfunc='sum').fillna(0)

  x_data = [str(year) for year in sorted(site_df['year'].unique())]
  y_data = {}
  # Iterate over columns (species) and extract data for each species
  for species in pivot_df.columns:
      y_data[species] = pivot_df[species].tolist()


  # Bar Chart
  bar = Bar(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))  # Use LIGHT theme for a white background
  bar.add_xaxis(x_data)
  bar.set_global_opts(
      title_opts=opts.TitleOpts(title="Bar Chart - Percent Species Coverage in " + site_name),
      datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
      legend_opts=opts.LegendOpts(pos_top="7%"),
      yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value}%"))  # Add % to y-axis labels
  )

  for species, data in y_data.items():
      bar.add_yaxis(species, [round(element,2) for element in data])


  # Add to grid
  graph = Grid(init_opts=opts.InitOpts(theme=ThemeType.VINTAGE))  # Use LIGHT theme for a white background
  graph.add(bar, grid_opts=opts.GridOpts(pos_top="30%"))

  # Return grid
  return graph

## Step 4 - HTML Rendering Function

In [9]:
def apply_html_styling(html_file_path):
  # Open the HTML file and apply styling
  with open(html_file_path, 'r+', encoding="utf8") as html:
      html_bf = BeautifulSoup(html, "lxml")

      # Add styling here
      body = html_bf.find("body")
      body["style"] = "background-color:#fef8ef;"
      html_new = str(html_bf)
      html.seek(0, 0)
      html.truncate()
      html.write(html_new)
      html.close()

In [10]:
def generate_html_for_site(site_name, surface_chart, river_chart, pie_chart, bar_chart):
  # Create a Page layout and add the River Chart to it
  page_layout = Page(layout=Page.SimplePageLayout).add(
      surface_chart,
      river_chart,
      pie_chart,
      bar_chart
  )

  # Render the Page and save it as an HTML file
  html_file_path = f"{site_name}_visualization.html"
  page_layout.render(html_file_path)

  # Apply styling to the HTML file
  apply_html_styling(html_file_path)

  # Display the path to the generated HTML file
  print(f"HTML file for {site_name} visualization saved at: {html_file_path}")

## Step 5: Iterations to render and generate html

In [11]:
for site_name, site_df in grouped_data:
  surface_chart = surface(elevation_df, species_df, site_name)
  river_chart = themeriver(site_df, site_name)
  pie_chart = pie(site_df, site_name)
  bar_chart = bar(site_df, site_name)
  generate_html_for_site(site_name, surface_chart, river_chart, pie_chart, bar_chart)

HTML file for Bob Creek visualization saved at: Bob Creek_visualization.html
HTML file for Bodega visualization saved at: Bodega_visualization.html
HTML file for Buck Gully South visualization saved at: Buck Gully South_visualization.html
HTML file for Cape Arago visualization saved at: Cape Arago_visualization.html
HTML file for Fogarty Creek visualization saved at: Fogarty Creek_visualization.html
HTML file for Heisler Park visualization saved at: Heisler Park_visualization.html
HTML file for Point Fermin visualization saved at: Point Fermin_visualization.html
HTML file for White Point visualization saved at: White Point_visualization.html
