# draw_pic.ipynb Documentation
This ipynb file is written for showing training result as SVG.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import os
print(pd.__version__)
print(np.__version__)
print(matplotlib.__version__)

1.5.3
1.24.3
3.7.1


## Variable

```qualitative_trait_list``` : A list of all qualitative traits that the model has been trained on.

```quantitative_trait_list``` : A list of all quantitative traits that the model has been trained on.

```quantitative_dir_path``` : The parent folder path of the quantitative trait output file.

```qualitative_dir_path``` : The parent folder path of the qualitative trait output file.

```qualitative_trait_file``` : The qualitative trait contains the output file path of 5000 samples.

```quantitative_trait_file``` : The quantitative trait contains the output file path of 5000 samples.

```save_path``` : SVG image saving path.

In [7]:
qualitative_trait_list = ['ST','FC','P_DENS','POD'] 
quantitative_trait_list = ['protein','oil','SdWgt','Yield','R1','R8','Hgt']
quantitative_dir_path = r"out\quantitative_trait"
qualitative_dir_path = r"out\qualitative_trait"
qualitative_trait_file=os.path.join(qualitative_dir_path,'5000_out.txt')
quantitative_trait_file=os.path.join(quantitative_dir_path,'5000_out.txt')
save_path = r"result"

## Function

### qualitative_trait(qualitative_trait_file, save_path, qualitative_trait_list)

Show the Evaluation metrics of qualitative trait.

For example:
```python
qualitative_dir_path = ''
qualitative_trait_file=os.path.join(qualitative_dir_path,'5000_out.txt')
save_path=''
qualitative_trait_list=''
qualitative_trait(qualitative_trait_file,save_path,qualitative_trait_list)
```

In [11]:
def qualitative_trait(qualitative_trait_file,save_path,qualitative_trait_list):
    csv = open(qualitative_trait_file,'r')
    lines = csv.readlines()
    lines = lines[16:-2]
    lines = [line.rstrip() for line in lines]
    train_loss =[ ]
    test_loss =[ ]
    acc = [ ]
    recall = [ ]
    f1_score = [ ]
    pre_score = [ ]
    len(lines)
    epoch_list = [ i+1 for i in range(150) ]
    print(f"All pictures will be saved to {save_path}/trait_name_eval(or loss).svg.")
    for trait in qualitative_trait_list:
        print(f"Now drawing {trait}.")
        for epoch in range(len(lines)):
            if f'{trait}' in lines[epoch] and ' | ' in lines[epoch] :
                train_loss.append(float(lines[epoch].split(' | ')[1].split(',')[0].split(':')[1]))
                test_loss.append(float(lines[epoch].split(' | ')[1].split(',')[1].split(':')[1]))
                acc.append(float(lines[epoch].split(' | ')[1].split(',')[2].split(':')[1]))
                recall.append(float(lines[epoch].split(' | ')[1].split(',')[3].split(':')[1]))
                f1_score.append(float(lines[epoch].split(' | ')[1].split(',')[4].split(':')[1]))
                pre_score.append(float(lines[epoch].split(' | ')[1].split(',')[5].split(':')[1]))
        max_train,max_test,min_train,min_test = max(train_loss),max(test_loss),min(train_loss),min(test_loss)
        plt.plot(epoch_list,train_loss,marker =None ,linewidth=1.0,label='train loss')
        plt.plot(epoch_list,test_loss,marker=None,linewidth=1.0,label='test loss')
        min_y = min(min_test,min_train)*0.8
        max_y = max(max_train,max_test)*1.2
        plt.ylim((0,max_y))
        y_ticks = np.round((np.arange(min_y,max_y,abs(min_y - max_y)/ 6)),2)
        plt.yticks(y_ticks)
        plt.margins(y=0)
        plt.legend(loc = 'best')
        plt.title(f'CrossEntropy Loss of {trait}')
        plt.savefig(os.path.join(save_path,f'{trait}_loss.svg'),dpi = 400,format = 'svg')
        plt.close()
        max_acc,max_recall,max_f1,max_pre = max(acc),max(recall),max(f1_score),max(pre_score)
        min_acc,min_recall,min_f1,min_pre = min(acc),min(recall),min(f1_score),min(pre_score)
        range_min = min(min_acc,min_f1,min_recall,min_pre)*0.7
        range_max = max(max_acc,max_f1,max_recall,max_pre)*1.2
        plt.plot(epoch_list,acc,marker =None ,linewidth=1.0,label='accuracy')
        plt.plot(epoch_list,recall,marker =None ,linewidth=1.0,label='recall')
        plt.plot(epoch_list,f1_score,marker =None ,linewidth=1.0,label='f1 score')
        plt.plot(epoch_list,pre_score,marker =None ,linewidth=1.0,label='precision')
        plt.ylim((0,range_max))
        y_ticks = np.round((np.arange(range_min,range_max,abs(range_min - range_max)/ 6)),2)
        plt.yticks(y_ticks)
        plt.margins(y=0)
        plt.legend(loc='best')
        plt.title(f'eval of {trait}')
        plt.savefig(os.path.join(save_path,f'{trait}_eval.svg'),dpi = 400)
        plt.close()
        train_loss = []
        test_loss = []
        acc =[]
        recall=[]
        f1_score = []
        pre_score = [ ]
        
qualitative_trait(qualitative_trait_file,save_path,qualitative_trait_list)

All pictures will be saved to result/trait_name_eval(or loss).svg.
Now drawing ST.
Now drawing FC.
Now drawing P_DENS.
Now drawing POD.


### quantitative_trait(quantitative_trait_file,save_path,quantitative_trait_list):

Show the Evaluation metrics of quantitative trait.

For example:
```python
quantitative_dir_path = ''
quantitative_trait_file=os.path.join(quantitative_dir_path,'5000_out.txt')
save_path=''
quantitative_trait_list=''
quantitative_trait(quantitative_trait_file,save_path,quantitative_trait_list)
```

In [12]:
def quantitative_trait(quantitative_trait_file,save_path,quantitative_trait_list):
    csv = open(quantitative_trait_file,'r')
    lines = csv.readlines()
    lines = lines[16:-2]
    lines = [line.rstrip() for line in lines]
    train_loss =[ ]
    test_loss =[ ]
    r = [ ]
    epoch_list = [ i+1 for i in range(150) ]
    print(f"All pictures will be saved to {save_path}/trait_name_eval(or loss).svg.")
    for trait in quantitative_trait_list:
        print(f"Now drawing {trait}.")
        for epoch in range(len(lines)):
            if f'{trait}' in lines[epoch] and ' | ' in lines[epoch] :
                train_loss.append(float(lines[epoch].split(' | ')[1].split(',')[0].split(':')[1]))
                test_loss.append(float(lines[epoch].split(' | ')[1].split(',')[1].split(':')[1]))
                r.append(float(lines[epoch].split(' | ')[1].split(',')[2].split(':')[1]))
        plt.plot(epoch_list,train_loss,marker =None ,linewidth =1.0 ,label='train loss')
        plt.plot(epoch_list,test_loss,marker=None,linewidth =1.0 ,label='test loss')
        max_train,max_test,min_train,min_test = max(train_loss),max(test_loss),min(train_loss),min(test_loss)
        plt.legend(loc = 'best')
        min_y = min(min_test,min_train)*0.8
        max_y = max(max_train,max_test)*1.2
        plt.ylim((0,max_y))
        y_ticks = np.round((np.arange(min_y,max_y,abs(min_y - max_y)/ 6)),2)
        plt.xlabel('epoch')
        plt.title(f'Mean Square  Loss of {trait}')
        plt.savefig(os.path.join(save_path,f'{trait}_loss.svg'),dpi = 600)
        plt.close()
        plt.plot(epoch_list,r,marker =None ,linewidth =1.0,label='r')
        plt.title(f' Pearson correlation coefficient of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_eval.svg'),dpi = 600)
        plt.close()
        train_loss = []
        test_loss = []
        r =[]
        
quantitative_trait(quantitative_trait_file,save_path,quantitative_trait_list)

All pictures will be saved to result/trait_name_eval(or loss).svg.
Now drawing protein.
Now drawing oil.
Now drawing SdWgt.
Now drawing Yield.
Now drawing R1.
Now drawing R8.
Now drawing Hgt.


### qualitative_trait_compare(qualitative_dir_path,save_path,qualitative_trait_list)

Compare the performance of qualitative trait models across different sample size gradients.

For example:
```python
qualitative_dir_path=''
save_path=''
qualitative_trait_list=''
qualitative_trait_compare(qualitative_dir_path,save_path,qualitative_trait_list)
```

In [14]:
def qualitative_trait_compare(qualitative_dir_path,save_path,qualitative_trait_list):
    files = os.listdir(qualitative_dir_path)
    path_list = [os.path.join(qualitative_dir_path,file) for file in files]
    epoch_list = [ i+1 for i in range(150) ]
    print(f"All pictures will be saved to {save_path}/trait_name_evaluation.svg")
    for trait in qualitative_trait_list:
        print(f"Now drawing {trait}.")
        acc_list = [ ]
        recall_list = [ ]
        f1_score_list = [ ]
        test_loss_list = [ ]
        pre_list = [ ]
        for csv_path in path_list:
            train_loss =[ ]
            test_loss =[ ]
            acc = [ ]
            recall = [ ]
            f1_score = [ ]
            pre_score =[ ]
            csv = open(csv_path,'r')
            lines = csv.readlines()
            lines = lines[16:-2]
            lines = [line.rstrip() for line in lines]
            for epoch in range(len(lines)):
                if f'{trait}' in lines[epoch] and ' | ' in lines[epoch] :
                    train_loss.append(float(lines[epoch].split(' | ')[1].split(',')[0].split(':')[1]))
                    test_loss.append(float(lines[epoch].split(' | ')[1].split(',')[1].split(':')[1]))
                    acc.append(float(lines[epoch].split(' | ')[1].split(',')[2].split(':')[1]))
                    recall.append(float(lines[epoch].split(' | ')[1].split(',')[3].split(':')[1]))
                    f1_score.append(float(lines[epoch].split(' | ')[1].split(',')[4].split(':')[1]))
                    pre_score.append(float(lines[epoch].split(' | ')[1].split(',')[5].split(':')[1]))
            test_loss_list.append(test_loss) 
            acc_list.append(acc)
            recall_list.append(recall)
            f1_score_list.append(f1_score)
            pre_list.append(pre_score)
        max_loss,min_loss = np.max(np.array(test_loss_list)),np.min(np.array(test_loss_list))
        plt.plot(epoch_list,test_loss_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,test_loss_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,test_loss_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,test_loss_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.ylim((0,max_loss*1.2))
        y_ticks = np.round((np.arange(min_loss,max_loss,abs(min_loss - max_loss)/ 6)),2)
        plt.legend(loc='best')
        plt.title(f'CrossEntropyLoss of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_CrossEntropyLoss.svg'),dpi = 600)
        plt.close()

        max_recall,min_recall = np.max(np.array(recall_list)),np.min(np.array(recall_list))
        plt.plot(epoch_list,recall_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,recall_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,recall_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,recall_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.legend(loc='best')
        plt.ylim((0,max_recall*1.2))
        y_ticks = np.round((np.arange(min_recall,max_recall,abs(min_recall - max_recall)/ 6)),2)
        plt.title(f'Recall of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_recall.svg'),dpi = 600)
        plt.close()

        max_acc,min_acc = np.max(np.array(acc_list)),np.min(np.array(acc_list))
        plt.plot(epoch_list,acc_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,acc_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,acc_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,acc_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.legend(loc='best')
        plt.ylim((0,max_acc*1.2))
        y_ticks = np.round((np.arange(min_acc,max_acc,abs(min_acc - max_acc)/ 6)),2)
        plt.title(f'Accuracy of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_acc.svg'),dpi = 600)
        plt.close()

        max_f1,min_f1 = np.max(np.array(f1_score_list)),np.min(np.array(f1_score_list))
        plt.plot(epoch_list,f1_score_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,f1_score_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,f1_score_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,f1_score_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.legend(loc='best')
        plt.ylim((0,max_f1*1.2))
        y_ticks = np.round((np.arange(min_f1,max_f1,abs(min_f1 - max_f1)/ 6)),2)
        plt.title(f'F1 score of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_f1_score.svg'),dpi = 600)
        plt.close()

        max_pre,min_pre = np.max(np.array(pre_list)),np.min(np.array(pre_list))
        plt.plot(epoch_list,pre_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,pre_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,pre_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,pre_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.legend(loc='best')
        plt.ylim((0,max_pre*1.2))
        y_ticks = np.round((np.arange(min_pre,max_pre,abs(min_pre - max_pre)/ 6)),2)
        plt.title(f'Precision of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_precision.svg'),dpi = 600)
        plt.close()
        csv.close()

qualitative_trait_compare(qualitative_dir_path,save_path,qualitative_trait_list)

All pictures will be saved to result/trait_name_evaluation.svg
Now drawing ST.
Now drawing FC.
Now drawing P_DENS.
Now drawing POD.


### quantitative_trait_compare(quantitative_dir_path,save_path,quantitative_trait_list)

Compare the performance of quantitative trait models across different sample size gradients.

For example:
```python
quantitative_dir_path=''
save_path=''
quantitative_trait_list=''
quantitative_trait_compare(quantitative_dir_path,save_path,quantitative_trait_list)
```

In [15]:
def quantitative_trait_compare(quantitative_dir_path,save_path,quantitative_trait_list):
    files = os.listdir(quantitative_dir_path)
    path_list = [os.path.join(quantitative_dir_path,file) for file in files]
    epoch_list = [ i+1 for i in range(150) ]
    print(f"All pictures will be saved to {save_path}/trait_name_evaluation.svg")
    for trait in quantitative_trait_list:
        print(f"Now drawing {trait}.")
        acc_list = [ ] 
        recall_list = [ ]
        f1_score_list = [ ]
        test_loss_list = [ ] 
        for csv_path in path_list:
            train_loss =[ ]
            test_loss =[ ]
            acc = [ ]
            recall = [ ]
            f1_score = [ ]
            csv = open(csv_path,'r')
            lines = csv.readlines()
            lines = lines[16:-2]
            lines = [line.rstrip() for line in lines]
            for epoch in range(len(lines)):
                if f'{trait}' in lines[epoch] and ' | ' in lines[epoch] :
                    train_loss.append(float(lines[epoch].split(' | ')[1].split(',')[0].split(':')[1]))
                    test_loss.append(float(lines[epoch].split(' | ')[1].split(',')[1].split(':')[1]))
                    acc.append(float(lines[epoch].split(' | ')[1].split(',')[2].split(':')[1]))
            test_loss_list.append(test_loss) 
            acc_list.append(acc)
        max_loss,min_loss = np.max(np.array(test_loss_list)),np.min(np.array(test_loss_list))

        plt.plot(epoch_list,test_loss_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,test_loss_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,test_loss_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,test_loss_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.ylim((0,max_loss*1.05))
        y_ticks = np.round((np.arange(min_loss,max_loss,abs(min_loss - max_loss)/ 6)),2)
        plt.legend(loc='best')
        plt.title(f'Mean Square Error of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_MeanSquareError.svg'),dpi = 600)
        plt.close()


        max_acc,min_acc = np.max(np.array(acc_list)),np.min(np.array(acc_list))
        plt.plot(epoch_list,acc_list[0],marker =None ,linewidth=1.0,label='2000 samples')
        plt.plot(epoch_list,acc_list[1],marker =None ,linewidth=1.0,label='5000 samples')
        plt.plot(epoch_list,acc_list[2],marker =None ,linewidth=1.0,label='8000 samples')
        plt.plot(epoch_list,acc_list[3],marker =None ,linewidth=1.0,label='10000 samples')
        plt.legend(loc='best')
        plt.ylim((0,max_acc*1.2))
        y_ticks = np.round((np.arange(min_acc,max_acc,abs(min_acc - max_acc)/ 6)),2)
        plt.title(f'Accuracy of {trait}')
        plt.xlabel('epoch')
        plt.savefig(os.path.join(save_path,f'{trait}_r.svg'),dpi = 600)
        plt.close()
        csv.close()
        
quantitative_trait_compare(quantitative_dir_path,save_path,quantitative_trait_list)

All pictures will be saved to result/trait_name_evaluation.svg
Now drawing protein.
Now drawing oil.
Now drawing SdWgt.
Now drawing Yield.
Now drawing R1.
Now drawing R8.
Now drawing Hgt.
