In [6]:
import numpy as np
import matplotlib.pyplot as plt

# Dataset
months = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
temps = np.array([66, 64, 64, 66, 67, 70, 74, 75, 75, 73, 69, 65])
yields = np.array([18, 22, 19, 17, 14, 10, 6, 2, 3, 6, 10, 18])

# Calculate the means of X and y
x1_bar = np.mean(months)
x2_bar = np.mean(temps)
y_bar = np.mean(yields)

# Calculate the terms needed for the num and den of beta
num1 = np.sum((months - x1_bar) * (yields - y_bar))
den1 = np.sum((months - x1_bar)**2)

num2 = np.sum((temps - x2_bar) * (yields - y_bar))
den2 = np.sum((temps - x2_bar)**2)

# Calculate beta
b1 = num1 / den1
b2 = num2 / den2

# Calculate alpha
a1 = y_bar - b1 * x1_bar
a2 = y_bar - b2 * x2_bar

print(f"For H(x1) = a + bx1, a = {a1}, b = {b1}")
print(f"For H(x2) = c + dx2, c = {a2}, d = {b2}")




For H(x1) = a + bx1, a = 19.106060606060606, b = -1.0804195804195804
For H(x2) = c + dx2, c = 120.02392739273927, d = -1.5643564356435644


In [None]:
# Calculate the residuals
residuals1 = yields - (a1 + b1 * months)
residuals2 = yields - (a2 + b2 * temps)

# Plot the residuals
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(months, residuals1)
plt.axhline(y=0, color='r', linestyle='-')
plt.title("Residual Plot for H(x1)")
plt.xlabel("Month")
plt.ylabel("Residuals")

plt.subplot(1, 2, 2)
plt.scatter(temps, residuals2)
plt.axhline(y=0, color='r', linestyle='-')
plt.title("Residual Plot for H(x2)")
plt.xlabel("Average High Temperature (◦F)")
plt.ylabel("Residuals")

plt.show()


In [4]:
# Remove the outlier
temps_no_outlier = np.delete(temps, 0)
yields_no_outlier = np.delete(yields, 0)

# Calculate the means of X and y
x2_bar_no_outlier = np.mean(temps_no_outlier)
y_bar_no_outlier = np.mean(yields_no_outlier)

# Calculate the terms needed for the num and den of beta
num2_no_outlier = np.sum((temps_no_outlier - x2_bar_no_outlier) * (yields_no_outlier - y_bar_no_outlier))
den2_no_outlier = np.sum((temps_no_outlier - x2_bar_no_outlier)**2)

# Calculate beta and alpha
b2_no_outlier = num2_no_outlier / den2_no_outlier
a2_no_outlier = y_bar_no_outlier - b2_no_outlier * x2_bar_no_outlier

print(f"For y = c + dx2 without the outlier, c = {a2_no_outlier}, d = {b2_no_outlier}")

# Compare the parameters
print(f"The parameters changed by {abs(a2 - a2_no_outlier)} and {abs(b2 - b2_no_outlier)}")


For y = c + dx2 without the outlier, c = 118.46925260170293, d = -1.543519394512772
The parameters changed by 1.5546747910363337 and 0.020837041130792322
