<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">Colab KSO Tutorial #1: Check and update csv files</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Jun 27th, 2022</h5>

# Set up and requirements

## Install kso_data_management and its requirements

In [None]:
# Clone koster_data_management repo
!git clone --recurse-submodules -b dev https://github.com/ocean-data-factory-sweden/koster_data_management.git
!pip install -r koster_data_management/requirements.txt

# Install ipysheet version compatible in colab
!pip install ipysheet==0.4.4

# Restart the session to load the latest packages
exit()

### Import Python packages

In [None]:
# Set the directory of the libraries
import sys, os
from pathlib import Path

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
os.chdir("koster_data_management/tutorials")
sys.path.append('..')

# Enable third-party widgets(ipysheet)
from google.colab import output
output.enable_custom_widget_manager()

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.project_utils as p_utils
import kso_utils.server_utils as serv_utils
import kso_utils.t1_utils as t1

print("Packages loaded successfully")

### Choose your project

In [None]:
project_name = t_utils.choose_project()

Dropdown(description='Project:', options=('Template project', 'Koster_Seafloor_Obs', 'Spyfish_Aotearoa', 'SGU'…

## Initiate database

In [None]:
project = p_utils.find_project(project_name=project_name.value)
db_info_dict = t_utils.initiate_db(project)

# Review Sites information

## Map sites and metadata

In [None]:
kso_map = t1.map_site(db_info_dict, project)
kso_map

## Update sites metadata

You can update the contents of the cells in the spreadsheet below

In [None]:
sites_sheet = t1.open_sites_csv(db_info_dict)
sites_sheet

In [None]:
# Read the csv file with site information
sites_df = pd.read_csv(db_info_dict["local_sites_csv"])

# Convert ipysheet to pandas
sites_sheet_pd = ipysheet.to_dataframe(sites_sheet)


df_all = pd.concat([sites_df.set_index('site_id'), sites_sheet_pd.set_index('site_id')], 
                   axis='columns', keys=['Origin', 'Update'])
df_final = df_all.swaplevel(axis='columns')[sites_df.columns[1:]]
df_final

def highlight_diff(data, color='yellow'):
    attr = 'background-color: {}'.format(color)
    other = data.xs('Origin', axis='columns', level=-1)
    return pd.DataFrame(np.where(data.ne(other, level=0), attr, ''),
                        index=data.index, columns=data.columns)

df_final.style.apply(highlight_diff, axis=None)

### Confirm the changes

In [None]:
t1.update_sites_csv(sites_df_sheet)

In [None]:
sites_df = t1.check_sites_database(db_initial_info, sites_df_sheet, project)

### Check the movies_csv

In [None]:
movies_df_sheet = t1.open_movies_csv(db_initial_info)
movies_df_sheet

In [None]:
movies_df = t1.check_movies_csv(db_info_dict, movies_df_sheet, project)

In [None]:
#END