## Script with the finals GLMs used to create L-HAZELNUT model. 
data: DERUTA 2020
PhD: Francesca Grisafi

In [1]:
import importnb
import sys
import pandas as pd
import os

In [2]:
def find_dir(start_path, last_folder):
    '''
    This function traverses up the directory tree from the starting path
    until it finds a directory that ends with the specified folder name.

    Parameters:
    - start_path (str): The starting directory path.
    - last_folder (str): The folder name to look for at the end of the directory.

    Returns:
    - str: The path of the repo directory if found, otherwise None.
    '''
    current_path = start_path
    while True:
        if current_path.endswith(f"\\{last_folder}"):
            #print(f"{current_path} is {last_folder} path")
            return current_path
        else:
            #print(f"{current_path} is not the {last_folder} directory")
            parent_path = os.path.dirname(current_path)
            if parent_path == current_path:  # If we've reached the root directory
                #print(f"Reached the root directory without finding the {last_folder} directory")
                return None
            current_path = parent_path

In [3]:
#directory corrente
current_dir = os.path.abspath('')
import_path = find_dir(current_dir, "L-HAZELNUT")
import_path = os.path.join(import_path,"Scripts")
sys.path.append(import_path)

In [4]:
files = os.listdir(import_path)
print("File presenti nella directory:")
print(files)

File presenti nella directory:
['.ipynb_checkpoints', 'Annual_shoot.py', 'GLMs', 'Import_Dataset.py', 'Lateral_buds.py', '__init__.py', '__pycache__']


In [5]:
import Import_Dataset

In [6]:
import_path = os.path.join(import_path,"GLMs")
sys.path.append(import_path)
files = os.listdir(import_path)
print("File presenti nella directory:")
print(files)

File presenti nella directory:
['final_models_functions.py', 'final_models_plots.py', '__init__.py', '__pycache__']


In [7]:
import final_models_functions

In [8]:
file_paths = {
    'bud': "../../Data/bud.csv",
    'bud_proleptic': '../../Data/bud_proleptic.csv',
    'bud_sylleptic': '../../Data/bud_sylleptic.csv',
    'MV_bud_pro': '../../Data/MV_bud_pro.csv',
    'MV_bud_syl': '../../Data/MV_bud_syl.csv',
    'met_proleptic': '../../Data/met_proleptic.csv',
    'met_sylleptic': '../../Data/met_sylleptic.csv',
    'all_met_proleptic': '../../Data/all_met_proleptic.csv',
    'all_met_sylleptic': '../../Data/all_met_sylleptic.csv',
    'shoot': '../../Data/shoot.csv'
}

data = Import_Dataset.importdataset(**file_paths)

In [9]:
for name,dict_ in data.items():
    print(name)
    globals()[name] = dict_

bud
bud_proleptic
bud_sylleptic
MV_bud_PRO
MV_bud_SYL
met_proleptic
met_sylleptic
met_all_proleptic
met_all_sylleptic
shoot


#### function for the relationship between length in cm and length in 

In [10]:
final_models_functions.shootnbnodesfromlength_proba(shoot)

                                 OLS Regression Results                                
Dep. Variable:                   node   R-squared (uncentered):                   0.976
Model:                            OLS   Adj. R-squared (uncentered):              0.976
Method:                 Least Squares   F-statistic:                              2107.
Date:                Fri, 31 May 2024   Prob (F-statistic):                    1.11e-83
Time:                        16:54:16   Log-Likelihood:                         -207.78
No. Observations:                 104   AIC:                                      419.6
Df Residuals:                     102   BIC:                                      424.8
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1d8d038c130>

#### has_blind_node

In [11]:
final_models_functions.has_blind_node_proba(met_proleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      b   No. Observations:                 1062
Model:                            GLM   Df Residuals:                     1061
Model Family:                Binomial   Df Model:                            0
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -288.29
Date:                Fri, 31 May 2024   Deviance:                       576.58
Time:                        16:54:16   Pearson chi2:                     804.
No. Iterations:                     7   Pseudo R-squ. (CS):             0.1630
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
rank_node     -0.4663      0.029    -15.915      0.0

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x1d8d41b1880>

#### probability to have sylleptic

In [12]:
final_models_functions.has_sylleptic_proba(met_proleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:      shoot_type_binary   No. Observations:                 1062
Model:                            GLM   Df Residuals:                     1061
Model Family:                Binomial   Df Model:                            0
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -523.40
Date:                Fri, 31 May 2024   Deviance:                       1046.8
Time:                        16:54:16   Pearson chi2:                 1.03e+03
No. Iterations:                     5   Pseudo R-squ. (CS):            0.05993
Covariance Type:            nonrobust                                         
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
abs_norm_median_distance

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x1d8d41c9280>

#### number of buds inside sylleptic shoots

In [13]:
final_models_functions.nb_mv_in_sylleptic_lambda(met_sylleptic)

                 Generalized Linear Model Regression Results                  
Dep. Variable:       tot_buds_syl_m_v   No. Observations:                  231
Model:                            GLM   Df Residuals:                      228
Model Family:                 Poisson   Df Model:                            2
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -385.74
Date:                Fri, 31 May 2024   Deviance:                       217.83
Time:                        16:54:16   Pearson chi2:                     190.
No. Iterations:                     4   Pseudo R-squ. (CS):             0.1595
Covariance Type:            nonrobust                                         
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

#### poisson function for number of V buds in proleptic shoots

In [14]:
final_models_functions.nb_v_in_sylleptic_lambda(MV_bud_SYL)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            fate_binary   No. Observations:                  522
Model:                            GLM   Df Residuals:                      521
Model Family:                Binomial   Df Model:                            0
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -361.36
Date:                Fri, 31 May 2024   Deviance:                       722.72
Time:                        16:54:16   Pearson chi2:                     522.
No. Iterations:                     3   Pseudo R-squ. (CS):              0.000
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0843      0.088      0.963      0.3

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x1d8d41e1730>

#### probability of bursting in sylleptic shoots (MOD4)

In [15]:
final_models_functions.burst_in_sylleptic_proba(MV_bud_SYL)

ValueError: endog has evaluated to an array with multiple columns that has shape (522, 2). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).

#### gamma dist number of nodes of new shoots(MOD5)

#### nb_buds in proleptic

#### bud type in proleptic shoots