In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import optuna as ot

In [21]:
data = pd.read_csv('aggregated_data.csv')

# INFLATION ADJUSTMENT
base_cpi = data['CPI_US'].iloc[-1]
data['AVG_PRICE_AUCTION'] = data['AVG_PRICE_AUCTION'] * (base_cpi/data['CPI_US'])
data['CORN_FUTURES_TYPICAL_PRICE'] = data['CORN_FUTURES_TYPICAL_PRICE'] * (base_cpi/data['CPI_US'])
data['FEEDER_CATTLE_FUTURES_TYPICAL_PRICE'] = data['FEEDER_CATTLE_FUTURES_TYPICAL_PRICE'] * (base_cpi/data['CPI_US'])
data['US_MXN_RATES'] = data['US_MXN_RATES'] * (base_cpi/data['CPI_US'])
data['DIESEL_RETAIL_PRICE'] = data['DIESEL_RETAIL_PRICE'] * (base_cpi/data['CPI_US'])
data['DATE'] = pd.to_datetime(data['DATE'])

data = data.sort_values('DATE', ignore_index=True).set_index('DATE').drop(['avg_price_steers_ML1', 'avg_price_steers_ML1_2', 'avg_price_heifers_ML1', 'avg_price_heifers_ML1_2', 'CPI_US'], axis=1)
# print(data.head())


In [35]:
# CREATE RANDOM FOREST MODEL
X = data.drop(['AVG_PRICE_AUCTION'], axis=1)
y = data['AVG_PRICE_AUCTION']

# SPLIT DATA INTO 80/20 TRAINING TESTING SETS
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CREATE AND TRAIN RF REGRESSOR
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)

# MAKE PREDICTIONS ON TEST SET
rf_pred = rf_regressor.predict(X_test)

# EVALUATING MODEL PERFORMANCE
rf_r2 = r2_score(y_test, rf_pred)
print(f'R² Score: {rf_r2}')

n = X_test.shape[0]
k = X_test.shape[1]
rf_adj_r2 = 1-((1-rf_r2)*(n-1)/(n-k-1))
print(f'Adjusted-R² Score: {rf_adj_r2}')

R² Score: 0.9644705539882531
Adjusted-R² Score: 0.9600976990944996


In [37]:
# VISUALIZATION OF DATA
plt.figure(figsize=(10,5))

<Figure size 1000x500 with 0 Axes>

<Figure size 1000x500 with 0 Axes>

In [31]:
# # CREATE A CORRELATION MATRIX
# correlation_matrix = data.corr()

# # CREATE HEATMAP
# plt.figure(figsize=(6,6))
# sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f', square=True, linewidths=0.5)