# Standard Imports

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# Constants

In [11]:
seed = 42

# Import data

In [21]:
# Chemical and Instrumental Assessment of Green Tea Sensory Preference - Y.R. Liang, Q. Ye, J. Jin, H. Liang, J.L. Lu, Y.Y. Du & J.J. Dong
# https://doi.org/10.1080/10942910701299430
# Notes: downloaded CSVs

df_10942910701299430_chemical_composition = pd.read_csv('../data/10942910701299430/chemical_composition.csv', index_col=0, header=1)
df_10942910701299430_sensory_evaluation = pd.read_csv('../data/10942910701299430/sensory_evaluation.csv')

# Phytochemical Composition and Antioxidant Capacity of 30 Chinese Teas - by Guo-Yi Tang, Cai-Ning Zhao, Xiao-Yu Xu, Ren-You Gan, Shi-Yu Cao, Qing Liu, AoShang, Qian-Qian Mao & Hua-Bin Li
# https://doi.org/10.3390/antiox8060180
# Notes: Manually scraped data

df_antiox8060180_chemical_composition = pd.read_csv('../data/antiox8060180/chemical_composition.csv')

# Catechin and caffeine content of tea (Camellia sinensis L.) leaf significantly differ with seasonal variation... - Himangshu Deka, Tupu Barman, Jintu Dutta, Arundhuti Devi, Pradip Tamuly, Ranjit Kumar Paul & Tanmoy Karak 
# https://doi.org/10.1016/j.jfca.2020.103684
# https://krishi.icar.gov.in/jspui/bitstream/123456789/68751/2/S0889157520313892-main.pdf - Free access

# df_j.jfca.2020.103684_chemical_composition = pd.read_csv('dava/j.jfca.2020.103684/chemical_composition.csv')

# Comparative analysis of tea catechins and theaflavins by high-performance liquid chromatography and capillary electrophoresis - Bee-Lan Lee & Choon-Nam Ong
# https://doi.org/10.1016/S0021-9673(00)00215-6
# Notes: Manually scraped data

df_S0021967300002156 = pd.read_csv('../data/S0021967300002156/chemical_composition.csv')

# Survey of Catechins, Gallic Acid, and Methylxanthines in Green, Oolong, Pu-erh, and Black Teas - Jen-Kun Lin, Chih-Li Lin, Yu-Chih Liang, Shoei-Yn Lin-Shiau & I-Ming Juan
# https://doi.org/10.1021/jf980223x
# Notes: Manually scraped data

df_S0021967300002156 = pd.read_csv('../data/jf980223x/chemical_composition.csv')


In [30]:
df_10942910701299430_sensory_evaluation

Unnamed: 0,Sample No,Taste,Appearance,Aroma,Liqour color,Infused leaf,TSSP b
0,1,23.1 ± 0.51,7.9 ± 0.18,23.1 ± 0.53,8.1 ± 0.09,15.2 ± 0.18,77.4 ± 1.71
1,2,25.8 ± 0.34,8.1 ± 0.11,24.9 ± 0.45,8.3 ± 0.10,16.4 ± 0.19,83.5 ± 1.19
2,3,23.7 ± 0.37,7.8 ± 0.23,23.7 ± 0.38,7.6 ± 0.10,15.8 ± 0.13,78.6 ± 1.21
3,4,23.7 ± 0.43,8.3 ± 0.12,24.0 ± 0.46,8.6 ± 0.11,16.6 ± 0.16,81.2 ± 1.28
4,5,24.6 ± 0.37,8.2 ± 0.24,26.4 ± 0.45,8.5 ± 0.09,16.6 ± 0.18,84.3 ± 1.32
5,6,24.6 ± 0.39,8.1 ± 0.17,27.0 ± 0.43,8.7 ± 0.12,16.8 ± 0.20,85.2 ± 1.30
6,7,25.2 ± 0.30,8.4 ± 0.16,24.9 ± 0.33,8.4 ± 0.11,17.8 ± 0.23,84.7 ± 1.12
7,8,23.7 ± 0.27,7.7 ± 0.19,24.3 ± 0.31,8.4 ± 0.12,15.8 ± 0.21,79.9 ± 1.10
8,9,23.1 ± 0.31,7.3 ± 0.20,22.2 ± 0.32,8.1 ± 0.09,16.0 ± 0.27,76.7 ± 1.17
9,10,22.2 ± 0.33,7.8 ± 0.12,22.5 ± 0.36,8.0 ± 0.11,16.4 ± 0.21,76.9 ± 1.11


# Combine data

## Data Frame Format

The data frames will be combined into the following format:

- `'Catechin'`
- `'Epicatechin'`
- `'Gallocatechin'`
- `'Epigallocatechin'`
- `'Catechin Gallate'`
- `'Epicatechin Gallate'`
- `'Gallocatechin Gallate'`
- `'Epigallocatechin Gallate'`
- `'Gallic Acid'`
- `'Chlorogenic Acid'`
- `'Caffeine'`
- `'Taste'`
- `'Appearance'`
- `'Aroma'`
- `'Liqour color'`


# Visualize data

# Data Wrangling

# Training Device

In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


# Generative Adversarial Network

In [None]:
class Generator(nn.Module):
    def __init__(self, ngpu):
            super(Generator, self).__init__()
            self.ngpu = ngpu

class Discriminator(nn.Module):    
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu

# Split data

In [None]:
from torch.utils.data import DataLoader, random_split

train_size = int(0.8 * len(df))
test_size = len(df) - train_size
train_df, test_df = random_split(df, [train_size, test_size])

train_loader = DataLoader(train_df, batch_size=64, shuffle=True)
test_loader = DataLoader(test_df, batch_size=64, shuffle=True)

# Feature Engineering

# DBSCAN

In [None]:
# Model Creation

In [None]:
# Hyperparameter Selection

In [None]:
# Model Training

# Multilayer Perceptron

In [None]:
# Model Creation

In [None]:
# Hyperparameter Selection

In [None]:
# Model Training

# Convolutiontional Neural Network

In [None]:
# Model Creation

In [None]:
# Hyperparameter Selection


In [None]:
# Model Training

# Model Visualization and Comparison

# Export model