In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from scipy.stats import spearmanr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [3]:
data = pd.read_csv('aggregated_data.csv')

# INFLATION ADJUSTMENT
base_cpi = data['CPI_US'].iloc[-1]
data['AVG_PRICE_AUCTION'] = data['AVG_PRICE_AUCTION'] * (base_cpi/data['CPI_US'])
data['CORN_FUTURES_TYPICAL_PRICE'] = data['CORN_FUTURES_TYPICAL_PRICE'] * (base_cpi/data['CPI_US'])
data['FEEDER_CATTLE_FUTURES_TYPICAL_PRICE'] = data['FEEDER_CATTLE_FUTURES_TYPICAL_PRICE'] * (base_cpi/data['CPI_US'])
data['US_MXN_RATES'] = data['US_MXN_RATES'] * (base_cpi/data['CPI_US'])
data['DIESEL_RETAIL_PRICE'] = data['DIESEL_RETAIL_PRICE'] * (base_cpi/data['CPI_US'])
data['DATE'] = pd.to_datetime(data['DATE'])

# ['avg_price_steers_ML1', 'avg_price_steers_ML1_2', 'avg_price_heifers_ML1', 'avg_price_heifers_ML1_2', 'CPI_US', 'NATIONAL_AUCTION_SALES','NATIONAL_TOTAL_SALES', 'TEXAS_AUCTION_SALES']
# data = data.sort_values('DATE', ignore_index=True).set_index('DATE').drop(['avg_price_steers_ML1', 'avg_price_steers_ML1_2', 'avg_price_heifers_ML1', 'avg_price_heifers_ML1_2', 'NATIONAL_AUCTION_SALES','NATIONAL_TOTAL_SALES', 'TEXAS_AUCTION_SALES', 'CORN_FUTURES_TYPICAL_PRICE', 'DIESEL_RETAIL_PRICE'], axis=1)
data = data.sort_values('DATE', ignore_index=True).set_index('DATE').drop(['avg_price_steers_ML1', 'avg_price_steers_ML1_2', 'avg_price_heifers_ML1', 'avg_price_heifers_ML1_2', 'NATIONAL_AUCTION_SALES','NATIONAL_TOTAL_SALES', 'TEXAS_AUCTION_SALES', 'CORN_FUTURES_TYPICAL_PRICE'], axis=1)
# print(data.head())


In [None]:
X = data.drop(['AVG_PRICE_AUCTION'], axis=1)
y = data['AVG_PRICE_AUCTION']

# # SPLIT DATA INTO 80/20 TRAINING TESTING SETS
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_size = int(len(data.index) * 0.8)
train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]


X_train = train_data.drop('AVG_PRICE_AUCTION', axis=1)
y_train = train_data['AVG_PRICE_AUCTION']
X_test = test_data.drop('AVG_PRICE_AUCTION', axis=1)
y_test = test_data['AVG_PRICE_AUCTION']

In [5]:
def print_metrics(X_test, y_test, y_pred): # Helper function to print model metrics
    r2 = r2_score(y_test, y_pred) # R²
    # print(f'R² Score: {r2}')

    # n = X_test.shape[0]
    # k = X_test.shape[1]
    # adj_r2 = 1-((1-rf_r2)*(n-1)/(n-k-1)) # Adjusted-R²
    # print(f'Adjusted-R² Score: {adj_r2}')

    prcorr = np.corrcoef(y_test, y_pred)[0,1] # Pearson Correlation Coefficient 
    # print("Pearson correlation:", prcorr)

    srcorr, p_value = spearmanr(y_test, y_pred) # Spearman’s Rank Correlation
    # print("Spearman correlation:", srcorr)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred)) # RMSE
    # print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(y_test, y_pred) # MAE
    # print(f'Mean Absolute Error (MAE): {mae}')

    return (r2, prcorr, srcorr, rmse, mae)

In [6]:
# CREATE AND TRAIN LINEAR REGRESSION MODEL
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
lrg_pred = lin_reg.predict(X_test)
residuals = y_test-lrg_pred
# EVALUATING MODEL PERFORMANCE
print_metrics(X_test, y_test, lrg_pred)

(-0.08182700729641712,
 np.float64(0.7177897830693776),
 np.float64(0.7230951499444649),
 np.float64(15.365231300351272),
 11.466680879106535)

In [7]:
# VISUALIZATION OF DATA
fig = go.Figure()

config = {
    'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': 'lrg_plot',
    'height': 1800,
    'width': 2800,
    'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
    }
}

# Historical (Training) data trace
fig.add_trace(go.Scatter(
    x=X_train.index, y=y_train,
    mode='lines+markers',
    name='Historical (Train)',
    line=dict(color='blue')
))

# Actual test data trace
fig.add_trace(go.Scatter(
    x=X_test.index, y=y_test,
    mode='lines+markers',
    name='Actual (Test)',
    line=dict(color='black')
))

# Forecast (Predicted) data trace
fig.add_trace(go.Scatter(
    x=X_test.index, y=lrg_pred,
    mode='lines+markers',
    name='Forecast (Predicted)',
    line=dict(color='red')
))

# Update layout with titles and axis labels
fig.update_layout(
    title='Model Fit: Historical Data and Forecast',
    xaxis_title='Date',
    yaxis_title='AVG_PRICE_AUCTION (Inflation Adjusted)',
    template='plotly_white'
)