In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

# Load data
df_oct = pd.read_csv('weather(Oct).csv')
df_nov = pd.read_csv('weather(Nov).csv')
df_deer = pd.read_csv('weather(Deer).csv')

# Remove '%' and commas from deer data and convert to float
df_deer['SUCCESS PERCENTAGE'] = df_deer['SUCCESS PERCENTAGE'].str.replace('%', '').astype(float)
df_deer['TOTAL LICESNSE SALES'] = df_deer['TOTAL LICESNSE SALES'].str.replace(',', '').astype(float)
df_deer['REGISTERED TOTAL HARVEST'] = df_deer['REGISTERED TOTAL HARVEST'].str.replace(',', '').astype(float)

df_oct = df_oct.dropna(axis=1, how='all')
df_nov = df_nov.dropna(axis=1, how='all')
df_deer = df_deer.dropna(axis=1, how='all')

# Aggregate data by YEAR (calculate mean, max, and min for each metric)
oct_aggregated = df_oct.groupby('YEAR').agg({
    'TempMAX': 'mean', 'TempAVG': 'mean', 'TempMIN': 'mean',
    'DewMAX': 'mean', 'DewAVG': 'mean', 'DewMIN': 'mean',
    'HumidMAX': 'mean', 'HumidAVG': 'mean', 'HumidMIN': 'mean',
    'WindMAX': 'mean', 'WindAVG': 'mean', 'WindMIN': 'mean',
    'PressureMAX': 'mean', 'PressureAVG': 'mean', 'PressureMIN': 'mean',
    'PrecipTOTAL': 'sum'  
}).reset_index()

nov_aggregated = df_nov.groupby('YEAR').agg({
    'TempMAX': 'mean', 'TempAVG': 'mean', 'TempMIN': 'mean',
    'DewMAX': 'mean', 'DewAVG': 'mean', 'DewMIN': 'mean',
    'HumidMAX': 'mean', 'HumidAVG': 'mean', 'HumidMIN': 'mean',
    'WindMAX': 'mean', 'WindAVG': 'mean', 'WindMIN': 'mean',
    'PressureMAX': 'mean', 'PressureAVG': 'mean', 'PressureMIN': 'mean',
    'PrecipTOTAL': 'sum'
}).reset_index()

# Merge aggregated weather data with deer data
oct_deer_combined = pd.merge(oct_aggregated, df_deer, on='YEAR', how='inner')
nov_deer_combined = pd.merge(nov_aggregated, df_deer, on='YEAR', how='inner')



In [133]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Fit the model
X_oct = oct_deer_combined.drop(columns=['TOTAL LICESNSE SALES','REGISTERED TOTAL HARVEST', 'SUCCESS PERCENTAGE'])
y_oct = oct_deer_combined['SUCCESS PERCENTAGE']

# Prepare the features and target for November
X_nov = nov_deer_combined.drop(columns=['TOTAL LICESNSE SALES','REGISTERED TOTAL HARVEST', 'SUCCESS PERCENTAGE'])
y_nov = nov_deer_combined['SUCCESS PERCENTAGE']

# Initialize the model
model = LinearRegression()

# Fit and evaluate the model for October
model.fit(X_oct, y_oct)
y_oct_pred = model.predict(X_oct)
mse_oct = mean_squared_error(y_oct, y_oct_pred)
r2_oct = r2_score(y_oct, y_oct_pred)

# Print results for October
print("October Data:")
print(f"Mean Squared Error: {mse_oct:.2f}")
print(f"R² Score: {r2_oct:.2f}")
print("Coefficients:")
for feature, coef in zip(X_oct.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")

# Fit and evaluate the model for November
model.fit(X_nov, y_nov)
y_nov_pred = model.predict(X_nov)
mse_nov = mean_squared_error(y_nov, y_nov_pred)
r2_nov = r2_score(y_nov, y_nov_pred)
print('-----------------------------')
# Print results for November
print("November Data:")
print(f"Mean Squared Error: {mse_nov:.2f}")
print(f"R² Score: {r2_nov:.2f}")
print("Coefficients:")
for feature, coef in zip(X_nov.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")

October Data:
Mean Squared Error: 1.63
R² Score: 0.94
Coefficients:
YEAR: -1.26
TempMAX: 1.21
TempAVG: -3.38
TempMIN: 0.56
DewMAX: 1.32
DewAVG: -0.68
DewMIN: 0.68
HumidMAX: -1.21
HumidAVG: 0.49
HumidMIN: -0.16
WindMAX: 0.18
WindAVG: -0.88
WindMIN: -1.36
PressureMAX: -46.26
PressureAVG: 8.33
PressureMIN: 10.68
PrecipTOTAL: 1.61
-----------------------------
November Data:
Mean Squared Error: 3.63
R² Score: 0.87
Coefficients:
YEAR: -1.05
TempMAX: -2.67
TempAVG: 5.32
TempMIN: -2.39
DewMAX: 3.06
DewAVG: -3.92
DewMIN: 1.11
HumidMAX: -0.22
HumidAVG: -1.06
HumidMIN: 1.21
WindMAX: -0.59
WindAVG: -0.30
WindMIN: -1.82
PressureMAX: 42.97
PressureAVG: -26.26
PressureMIN: 5.03
PrecipTOTAL: 0.62


In [9]:
import statsmodels.api as sm

# Define a function to perform regression and get p-values
def regression_analysis(df, dependent_var):
    X = df.drop(columns=['YEAR', dependent_var])
    X = sm.add_constant(X)  # Add constant term for intercept
    y = df[dependent_var]
    
    model = sm.OLS(y, X)
    results = model.fit()
    
    print(f"P-values for {dependent_var}:")
    print(results.pvalues)
    print("\n")

# Perform regression analysis for October data
print("October Data:")
regression_analysis(oct_deer_combined, 'SUCCESS PERCENTAGE')

# Perform regression analysis for November data
print("November Data:")
regression_analysis(nov_deer_combined, 'SUCCESS PERCENTAGE')

October Data:
P-values for SUCCESS PERCENTAGE:
const                       6.115967e-02
TempMAX                     8.786440e-01
TempAVG                     2.510987e-01
TempMIN                     3.121147e-02
DewMAX                      2.187888e-02
DewAVG                      9.789224e-04
DewMIN                      1.217817e-03
HumidMAX                    5.852091e-04
HumidAVG                    1.107489e-04
HumidMIN                    1.011905e-04
WindMAX                     4.369480e-03
WindAVG                     1.540384e-03
WindMIN                     9.983955e-04
PressureMAX                 1.095986e-01
PressureAVG                 3.671145e-02
PressureMIN                 2.133200e-03
PrecipTOTAL                 1.174253e-02
TOTAL LICESNSE SALES        9.598734e-09
REGISTERED TOTAL HARVEST    3.405901e-11
dtype: float64


November Data:
P-values for SUCCESS PERCENTAGE:
const                       9.145382e-01
TempMAX                     7.640454e-01
TempAVG                    