In [1]:
# Library
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


# Survival Rate (SR)

\begin{equation}
survival\;rate=\frac{harvested\;shrimp}{seed\;shrimp}
\end{equation}

Data completeness:
- Total seed shrimp can be obtained from "cycle dataset"
- Unfortunately, there's no data for harvested shrimp in "harvest dataset"

On the other hand we have mortality data. Assuming that the mortality data record all number of death shrimps, the equation above can be modified into:

\begin{equation}
survival\;rate=\frac{seed\;shrimp-mortality\;shrimp}{seed\;shrimp}
\end{equation}

In [23]:
df_cycle = pd.read_csv('Data/cycles.csv')
df_cycle = df_cycle[['id', 'pond_id', 'started_at',  'finished_at', 'total_seed']].copy()
# Rename columns & change data type
df_cycle.columns = ['cycle_id', 'pond_id', 'start_date',  'finish_date', 'total_seed_shrimp']
df_cycle.start_date = pd.to_datetime(df_cycle.start_date)
df_cycle.finish_date = pd.to_datetime(df_cycle.finish_date)
# Drop duplicates and null rows
df_cycle = df_cycle.drop_duplicates()
df_cycle = df_cycle.dropna(subset=['cycle_id', 'start_date',  'finish_date', 'total_seed_shrimp'])

df_harvest = pd.read_csv('Data/harvests.csv')
df_harvest = df_harvest[['cycle_id', 'harvested_at', 'status']].copy()
# Rename columns & change data type
df_harvest.columns = ['cycle_id', 'harvest_date', 'harvest_status']
df_harvest.harvest_date = pd.to_datetime(df_harvest.harvest_date)
df_harvest.cycle_id = df_harvest.cycle_id.astype(int)
# Drop duplicates and null rows
df_harvest = df_harvest.drop_duplicates()
df_harvest = df_harvest.dropna(subset=['cycle_id', 'harvest_date', 'harvest_status'])

df_mortalities = pd.read_csv('Data/mortalities.csv')
df_mortalities = df_mortalities[['cycle_id', 'recorded_at', 'quantity']].copy()
# Rename columns & change data type
df_mortalities.columns = ['cycle_id', 'record_date', 'total_mortality_shrimp']
df_mortalities.record_date = pd.to_datetime(df_mortalities.record_date)
# Drop duplicates and null rows
df_mortalities = df_mortalities.drop_duplicates()
df_mortalities = df_mortalities.dropna(subset=['cycle_id', 'record_date', 'total_mortality_shrimp'])

In [25]:
df_cycle.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2499 entries, 0 to 2616
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   cycle_id           2499 non-null   int64         
 1   pond_id            2499 non-null   int64         
 2   start_date         2499 non-null   datetime64[ns]
 3   finish_date        2499 non-null   datetime64[ns]
 4   total_seed_shrimp  2499 non-null   int64         
dtypes: datetime64[ns](2), int64(3)
memory usage: 117.1 KB


In [31]:
df_cycle[df_cycle.id == 20124].sort_values(by=['id', 'updated_at'])

Unnamed: 0,id,pond_id,started_at,finished_at,created_at,updated_at,total_seed
1432,20124,37708,2023-01-09 00:00:00.000,2023-04-16 00:00:00.000,2022-12-24 12:09:37.000,2023-07-09 11:02:57.000,115000
1433,20124,37708,2023-01-09 00:00:00.000,2023-04-16 00:00:00.000,2022-12-24 12:09:37.000,2023-07-09 11:02:57.000,115000
1434,20124,37708,2023-01-09 00:00:00.000,2023-04-16 00:00:00.000,2022-12-24 12:09:37.000,2023-07-09 11:02:57.000,115000
1435,20124,37708,2023-01-09 00:00:00.000,2023-04-16 00:00:00.000,2022-12-24 12:09:37.000,2023-07-09 11:02:57.000,115000


In [19]:
df_harvest = pd.read_csv('Data/harvests.csv')
df_harvest = df_harvest[['cycle_id', 'created_at', 'updated_at', 'harvested_at', 'status']].copy()
df_harvest.sort_values(by=['cycle_id', 'updated_at'])

Unnamed: 0,cycle_id,id,created_at,updated_at,harvested_at,status
4663,3458.0,2053.0,2020-05-13 06:35:36,2020-06-27 15:34:37,2020-05-13,Partial
4664,3458.0,2088.0,2020-05-26 05:01:10,2020-06-27 15:34:40,2020-05-26,Partial
4665,3458.0,2199.0,2020-06-13 05:14:18,2020-06-27 15:34:45,2020-06-13,Partial
4666,3458.0,2286.0,2020-06-27 07:23:43,2020-06-27 15:34:49,2020-06-27,Full
1831,3459.0,2327.0,2020-07-06 05:50:00,2020-07-08 16:42:28,2020-07-06,Full
...,...,...,...,...,...,...
2059,29679.0,28248.0,2024-04-02 14:38:25,2024-04-02 14:38:25,2024-03-12,Partial
2060,29679.0,28252.0,2024-04-02 14:49:09,2024-04-02 14:49:09,2024-03-28,Full
2061,29679.0,28256.0,2024-04-02 20:46:06,2024-04-02 20:46:06,2024-03-28,Full
7514,29873.0,28263.0,2024-04-03 05:20:38,2024-04-03 15:36:51,2024-03-25,Full


In [21]:
df_mortalities = pd.read_csv('Data/mortalities.csv')
df_mortalities = df_mortalities[['cycle_id', 'created_at', 'updated_at', 'recorded_at', 'quantity']].copy()
df_mortalities.sort_values(by=['cycle_id', 'updated_at'])

Unnamed: 0,cycle_id,id,created_at,updated_at,recorded_at,quantity
10,7273,439,2021-04-06 14:06:54,2024-01-08 11:40:40,2021-01-22,37400
11,7273,442,2021-04-06 14:24:13,2024-01-08 11:40:40,2021-02-05,2664
12,7273,443,2021-04-06 14:29:11,2024-01-08 11:40:40,2021-02-19,2125
13,7273,444,2021-04-06 14:32:16,2024-01-08 11:40:40,2021-03-05,1491
7,7276,269,2021-03-14 17:50:41,2024-01-08 11:40:40,2021-03-05,1492
...,...,...,...,...,...,...
13029,29450,24396,2024-03-16 04:47:43,2024-03-16 04:48:11,2024-03-15,2600
13052,29450,24518,2024-03-18 11:52:44,2024-03-18 11:52:44,2024-03-16,6500
13091,29579,24822,2024-03-24 02:52:26,2024-03-24 04:13:46,2024-03-24,97
13092,29579,24896,2024-03-25 09:11:42,2024-03-25 09:11:42,2024-03-25,1638


In [32]:
df_harvest[df_harvest.cycle_id == 7273].sort_values(by=['id', 'updated_at'])

Unnamed: 0,cycle_id,id,created_at,updated_at,harvested_at,status
305,7273.0,4534.0,2021-03-15 15:43:35,2021-03-15 15:43:53,2021-02-20,Partial
306,7273.0,4716.0,2021-04-07 02:34:31,2021-04-07 02:34:31,2021-03-27,Full


In [33]:
df_cycle[df_cycle.id == 7273].sort_values(by=['id', 'updated_at'])

Unnamed: 0,id,pond_id,started_at,finished_at,created_at,updated_at,total_seed
672,7273,19168,2020-12-23 00:00:00.000,2021-03-27 00:00:00.000,2020-12-24 00:41:37.000,2021-04-09 00:06:34.000,75000
