In [108]:
import pandas as pd
import statsmodels.api as sm

# Load the data
accident_types = pd.read_excel('accident_types.xlsx')

# Define the dependent variable (endog) and independent variables (exog)
endog = accident_types['NoOfAccidents']
exog = accident_types[['Year', 'Derailments', 'Collisions', 'CollisionsAtLC', 'FireAccidents', 'Overshooting ', 'AccidentsAtTrack', 'AccidentsInStationLimits', 'PassenagersTrain', 'GoodsTrain', 'OtherTrain', 'NoDamgeToPR', 'CauseDamgeToPR', 'CostOfDamage', 'NoCasulties ', 'Casulties ', 'NoOfDeaths', 'NoOfInjuries']]

# Add a constant to the independent variables
exog = sm.add_constant(exog)

# Fit the GLM model using Poisson family
model = sm.GLM(endog, exog, family=sm.families.Poisson()).fit()

# Print the summary of the model
print(model.summary())
# Convert the model summaries to DataFrames
types = model_branch.summary2().tables[1]


# Save the summaries to an Excel file
types.to_excel('types_summary.xlsx')

                 Generalized Linear Model Regression Results                  
Dep. Variable:          NoOfAccidents   No. Observations:                   18
Model:                            GLM   Df Residuals:                        3
Model Family:                 Poisson   Df Model:                           14
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -34.000
Date:                Tue, 30 Jul 2024   Deviance:                       1.6775
Time:                        19:32:06   Pearson chi2:                     1.65
No. Iterations:                     5   Pseudo R-squ. (CS):             0.9570
Covariance Type:            nonrobust                                         
                               coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

In [107]:
import pandas as pd
import statsmodels.api as sm

# Load the data
accident_causes = pd.read_excel('accident_causes.xlsx')

# Define the dependent variable (endog)
endog = accident_causes['NoOfAccidents']

# Convert the categorical variable 'Cause' to dummy variables
exog = pd.get_dummies(accident_causes['Cause'], drop_first=True)

# Add a constant to the independent variables
exog = sm.add_constant(exog)

# Fit the GLM model using Poisson family
model = sm.GLM(endog, exog, family=sm.families.Poisson()).fit()

# Print the summary of the model
print(model.summary())
# Convert the model summaries to DataFrames
causes = model_branch.summary2().tables[1]


# Save the summaries to an Excel file
causes.to_excel('causes_summary.xlsx')

                 Generalized Linear Model Regression Results                  
Dep. Variable:          NoOfAccidents   No. Observations:                   21
Model:                            GLM   Df Residuals:                       19
Model Family:                 Poisson   Df Model:                            1
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -45.060
Date:                Tue, 30 Jul 2024   Deviance:                       28.610
Time:                        19:31:23   Pearson chi2:                     28.2
No. Iterations:                     4   Pseudo R-squ. (CS):            0.05540
Covariance Type:            nonrobust                                         
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 1.0498      0.22

In [110]:
import pandas as pd
import statsmodels.api as sm

# Load the data
accident_line = pd.read_excel('accident_line.xlsx')

# Drop rows with NaN values in dependent variables for this example
accident_line = accident_line.dropna(subset=['BranchLineAccidents', 'MainLineAccidents'])

# Combine categories if necessary to avoid perfect separation
# For example, combine tracks with low accident counts
accident_line['Track'] = accident_line['Track'].replace({
    'LLM  to RWP': 'Other',
    'PSC -TXLC': 'Other',
    'QTA- BLN': 'Other',
    'QTA- DLH-ZHN': 'Other',
    'SRQ to LLM': 'Other',
    'TXLC to HVN': 'Other'
})

# Define the dependent variables (endog)
endog_branch = accident_line['BranchLineAccidents']
endog_main = accident_line['MainLineAccidents']

# Convert 'Track' to dummy variables
exog = pd.get_dummies(accident_line['Track'], drop_first=True)

# Add a constant to the independent variables
exog = sm.add_constant(exog)

# Fit the GLM model for BranchLineAccidents using Poisson family
model_branch = sm.GLM(endog_branch, exog, family=sm.families.Poisson()).fit()
print("BranchLineAccidents Model Summary")
print(model_branch.summary())

# Fit the GLM model for MainLineAccidents using Poisson family
model_main = sm.GLM(endog_main, exog, family=sm.families.Poisson()).fit()
print("\nMainLineAccidents Model Summary")
print(model_main.summary())
# Convert the model summaries to DataFrames
summary_branch = model_branch.summary2().tables[1]
summary_main = model_main.summary2().tables[1]

# Save the summaries to an Excel file
with pd.ExcelWriter('model_summaries.xlsx') as writer:
    summary_branch.to_excel(writer, sheet_name='BranchLineAccidents')
    summary_main.to_excel(writer, sheet_name='MainLineAccidents')

BranchLineAccidents Model Summary
                  Generalized Linear Model Regression Results                  
Dep. Variable:     BranchLineAccidents   No. Observations:                    9
Model:                             GLM   Df Residuals:                        2
Model Family:                  Poisson   Df Model:                            6
Link Function:                     Log   Scale:                          1.0000
Method:                           IRLS   Log-Likelihood:                -20.443
Date:                 Tue, 30 Jul 2024   Deviance:                       20.481
Time:                         19:32:35   Pearson chi2:                     17.3
No. Iterations:                     22   Pseudo R-squ. (CS):             0.9983
Covariance Type:             nonrobust                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------

In [109]:
import pandas as pd
import statsmodels.api as sm

# Load the data
accident_seasons = pd.read_excel('accident_seasons.xlsx')

# Define the dependent variable (endog)
endog = accident_seasons['NoOfAccidents']

# Convert 'Season' to dummy variables
exog = pd.get_dummies(accident_seasons['Season'], drop_first=True)

# Add a constant to the independent variables
exog = sm.add_constant(exog)

# Fit the GLM model using Poisson family
model = sm.GLM(endog, exog, family=sm.families.Poisson()).fit()

# Print the summary of the model
print(model.summary())


# Convert the model summaries to DataFrames
season_summary = model_branch.summary2().tables[1]


# Save the summaries to an Excel file
season_summary.to_excel('season_summary.xlsx')
    

                 Generalized Linear Model Regression Results                  
Dep. Variable:          NoOfAccidents   No. Observations:                   12
Model:                            GLM   Df Residuals:                        8
Model Family:                 Poisson   Df Model:                            3
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -41.811
Date:                Tue, 30 Jul 2024   Deviance:                       35.145
Time:                        19:32:18   Pearson chi2:                     35.6
No. Iterations:                     4   Pseudo R-squ. (CS):             0.8858
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.9459      0.218      8.917      0.0