## Script with the finals GLMs used to create L-HAZELNUT model. 
data: DERUTA 2020
PhD: Francesca Grisafi

In [1]:
import importnb
import sys
import pandas as pd
import os

In [2]:
def find_dir(start_path, last_folder):
    '''
    This function traverses up the directory tree from the starting path
    until it finds a directory that ends with the specified folder name.

    Parameters:
    - start_path (str): The starting directory path.
    - last_folder (str): The folder name to look for at the end of the directory.

    Returns:
    - str: The path of the repo directory if found, otherwise None.
    '''
    current_path = start_path
    while True:
        if current_path.endswith(f"\\{last_folder}"):
            #print(f"{current_path} is {last_folder} path")
            return current_path
        else:
            #print(f"{current_path} is not the {last_folder} directory")
            parent_path = os.path.dirname(current_path)
            if parent_path == current_path:  # If we've reached the root directory
                #print(f"Reached the root directory without finding the {last_folder} directory")
                return None
            current_path = parent_path

In [3]:
#directory corrente
current_dir = os.path.abspath('')
import_path = find_dir(current_dir, "L-HAZELNUT")
import_path = os.path.join(import_path,"Scripts")
sys.path.append(import_path)

In [4]:
files = os.listdir(import_path)
print("File presenti nella directory:")
print(files)

File presenti nella directory:
['.ipynb_checkpoints', 'Annual_shoot.py', 'GLMs', 'Import_Dataset.py', 'Lateral_buds.py', '__init__.py', '__pycache__']


In [5]:
import Import_Dataset

In [6]:
import_path = os.path.join(import_path,"GLMs")
sys.path.append(import_path)
files = os.listdir(import_path)
print("File presenti nella directory:")
print(files)

File presenti nella directory:
['final_models_functions.py', 'final_models_plots.py', '__init__.py', '__pycache__']


In [7]:
import final_models_functions

In [8]:
file_paths = {
    'bud': "../../Data/bud.csv",
    'bud_proleptic': '../../Data/bud_proleptic.csv',
    'bud_sylleptic': '../../Data/bud_sylleptic.csv',
    'MV_bud_pro': '../../Data/MV_bud_pro.csv',
    'MV_bud_syl': '../../Data/MV_bud_syl.csv',
    'met_proleptic': '../../Data/met_proleptic.csv',
    'met_sylleptic': '../../Data/met_sylleptic.csv',
    'all_met_proleptic': '../../Data/all_met_proleptic.csv',
    'all_met_sylleptic': '../../Data/all_met_sylleptic.csv',
    'shoot': '../../Data/shoot.csv'
}

data = Import_Dataset.importdataset(**file_paths)

In [9]:
for name,dict_ in data.items():
    print(name)
    globals()[name] = dict_

bud
bud_proleptic
bud_sylleptic
MV_bud_PRO
MV_bud_SYL
met_proleptic
met_sylleptic
met_all_proleptic
met_all_sylleptic
shoot


#### function for the relationship between length in cm and length in 

In [10]:
final_models_functions.shootnbnodesfromlength_proba(shoot)

                                 OLS Regression Results                                
Dep. Variable:                   node   R-squared (uncentered):                   0.976
Model:                            OLS   Adj. R-squared (uncentered):              0.976
Method:                 Least Squares   F-statistic:                              2107.
Date:                Fri, 07 Jun 2024   Prob (F-statistic):                    1.11e-83
Time:                        13:52:12   Log-Likelihood:                         -207.78
No. Observations:                 104   AIC:                                      419.6
Df Residuals:                     102   BIC:                                      424.8
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x17e29e71100>

#### has_blind_node

In [11]:
final_models_functions.has_blind_node_proba(met_proleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      b   No. Observations:                 1062
Model:                            GLM   Df Residuals:                     1061
Model Family:                Binomial   Df Model:                            0
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -288.29
Date:                Fri, 07 Jun 2024   Deviance:                       576.58
Time:                        13:52:12   Pearson chi2:                     804.
No. Iterations:                     7   Pseudo R-squ. (CS):             0.1630
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
rank_node     -0.4663      0.029    -15.915      0.0

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x17e2ddbb370>

#### probability to have sylleptic

In [12]:
final_models_functions.has_sylleptic_proba(met_proleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:      shoot_type_binary   No. Observations:                 1062
Model:                            GLM   Df Residuals:                     1061
Model Family:                Binomial   Df Model:                            0
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -523.40
Date:                Fri, 07 Jun 2024   Deviance:                       1046.8
Time:                        13:52:12   Pearson chi2:                 1.03e+03
No. Iterations:                     5   Pseudo R-squ. (CS):            0.05993
Covariance Type:            nonrobust                                         
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
abs_norm_median_distance

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x17e2ddcaa00>

#### number of buds inside sylleptic shoots

In [13]:
final_models_functions.nb_mv_in_sylleptic_lambda(met_sylleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:       tot_buds_syl_m_v   No. Observations:                  231
Model:                            GLM   Df Residuals:                      228
Model Family:                 Poisson   Df Model:                            2
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -385.74
Date:                Fri, 07 Jun 2024   Deviance:                       217.83
Time:                        13:52:12   Pearson chi2:                     190.
No. Iterations:                     4   Pseudo R-squ. (CS):             0.1595
Covariance Type:            nonrobust                                         
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

#### poisson function for number of V buds in proleptic shoots

In [14]:
final_models_functions.nb_v_in_sylleptic_lambda(MV_bud_SYL)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            fate_binary   No. Observations:                  522
Model:                            GLM   Df Residuals:                      521
Model Family:                Binomial   Df Model:                            0
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -361.36
Date:                Fri, 07 Jun 2024   Deviance:                       722.72
Time:                        13:52:12   Pearson chi2:                     522.
No. Iterations:                     3   Pseudo R-squ. (CS):              0.000
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0843      0.088      0.963      0.3

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x17e2e2614c0>

#### probability of bursting in sylleptic shoots (MOD4)

In [15]:
final_models_functions.burst_in_sylleptic_proba(MV_bud_SYL)

Optimization terminated successfully.
         Current function value: 0.631469
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:          nb_new_shoots   No. Observations:                  522
Model:                          Logit   Df Residuals:                      517
Method:                           MLE   Df Model:                            4
Date:                Fri, 07 Jun 2024   Pseudo R-squ.:                 0.08510
Time:                        13:52:12   Log-Likelihood:                -329.63
converged:                       True   LL-Null:                       -360.29
Covariance Type:            nonrobust   LLR p-value:                 1.528e-12
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.2464      0.201      6.202      0.000       0.853       1.640
m:fate[M]     -0.7439      0.

#### gamma dist number of nodes of new shoots(MOD5)

In [16]:
final_models_functions.length_new_in_sylleptic(MV_bud_SYL)

AIC: length_new_in_sylleptic 856.0128672373229


(2.376534320583779,
 0.8351225805838078,
 856.0128672373229,
         x         y
 0     0.0  0.000000
 1     0.1  0.020589
 2     0.2  0.049176
 3     0.3  0.079046
 4     0.4  0.108042
 ..    ...       ...
 96    9.6  0.003952
 97    9.7  0.003687
 98    9.8  0.003440
 99    9.9  0.003209
 100  10.0  0.002993
 
 [101 rows x 2 columns])

#### nb_buds in proleptic

In [17]:
final_models_functions.nb_mv_in_proleptic_lambda(met_proleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            tot_buds_mv   No. Observations:                  707
Model:                            GLM   Df Residuals:                      706
Model Family:                 Poisson   Df Model:                            0
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -782.11
Date:                Fri, 07 Jun 2024   Deviance:                       101.14
Time:                        13:52:12   Pearson chi2:                     163.
No. Iterations:                     4   Pseudo R-squ. (CS):              0.000
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1236      0.035      3.495      0.0

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x17e2d963400>

#### bud type in proleptic shoots

In [18]:
final_models_functions.bud_type_in_proleptic(bud_proleptic)

Category mapping:
M: 0
V: 1
Optimization terminated successfully.
         Current function value: 0.634245
         Iterations 5
                          MNLogit Regression Results                          
Dep. Variable:           fate_encoded   No. Observations:                  750
Model:                        MNLogit   Df Residuals:                      744
Method:                           MLE   Df Model:                            5
Date:                Fri, 07 Jun 2024   Pseudo R-squ.:                 0.07700
Time:                        13:52:12   Log-Likelihood:                -475.68
converged:                       True   LL-Null:                       -515.37
Covariance Type:            nonrobust   LLR p-value:                 1.137e-15
fate_encoded=1       coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -1.0413      5.419     -0.192      0.848     -11.662  

<statsmodels.discrete.discrete_model.MNLogit at 0x17e2e261130>

#### probability of bursting in proleptic shoots (MOD3)

In [19]:
final_models_functions.burst_in_proleptic_proba(MV_bud_PRO)

Optimization terminated successfully.
         Current function value: 0.387341
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:     presence_new_shoot   No. Observations:                  800
Model:                          Logit   Df Residuals:                      795
Method:                           MLE   Df Model:                            4
Date:                Fri, 07 Jun 2024   Pseudo R-squ.:                  0.1647
Time:                        13:52:12   Log-Likelihood:                -309.87
converged:                       True   LL-Null:                       -370.98
Covariance Type:            nonrobust   LLR p-value:                 1.795e-25
                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept                        1.9914      0.126     15.812      0.000

<statsmodels.discrete.discrete_model.BinaryResultsWrapper at 0x17e2dde1b20>

#### length of new shoots(MOD6)

In [20]:
final_models_functions.length_new_in_proleptic_proba(MV_bud_PRO)

                            OLS Regression Results                            
Dep. Variable:              length2yo   R-squared:                       0.267
Model:                            OLS   Adj. R-squared:                  0.262
Method:                 Least Squares   F-statistic:                     59.50
Date:                Fri, 07 Jun 2024   Prob (F-statistic):           7.45e-43
Time:                        13:52:12   Log-Likelihood:                -1638.4
No. Observations:                 659   AIC:                             3287.
Df Residuals:                     654   BIC:                             3309.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept       

#### CLUSTERS(proleptic and sylleptic)

In [21]:
final_models_functions.have_clusters_proba(bud)

Cluster set proportion: 0.4786450662739323
                 Generalized Linear Model Regression Results                  
Dep. Variable:                     cl   No. Observations:                  679
Model:                            GLM   Df Residuals:                      676
Model Family:                Binomial   Df Model:                            2
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -408.15
Date:                Fri, 07 Jun 2024   Deviance:                       816.30
Time:                        13:52:12   Pearson chi2:                     720.
No. Iterations:                     4   Pseudo R-squ. (CS):             0.1666
Covariance Type:            nonrobust                                         
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x17e2dde1bb0>

#### NUTS(proleptic and sylleptic)

In [22]:
final_models_functions.number_nuts_lambda(bud)

Nut set proportion: 2.8984615384615386
                 Generalized Linear Model Regression Results                  
Dep. Variable:                     nu   No. Observations:                  325
Model:                            GLM   Df Residuals:                      323
Model Family:                 Poisson   Df Model:                            1
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -538.34
Date:                Fri, 07 Jun 2024   Deviance:                       141.17
Time:                        13:52:12   Pearson chi2:                     139.
No. Iterations:                     4   Pseudo R-squ. (CS):            0.04347
Covariance Type:            nonrobust                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x17e2e2b4b80>

#### function for the relationship between diameter in mm and length in cm

In [23]:
final_models_functions.diameter_proba(shoot)

Parameters: a=0.152679, b=0.37
RMSE: 0.59
