In [153]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Q1

In [154]:
df = pd.read_csv('gold_silver_price.csv')
df.head()

Unnamed: 0,Year,Gold,Silver
0,1965,72,
1,1966,84,
2,1967,103,
3,1968,162,
4,1969,176,


## Manual

In [155]:
df['X - X_mean'] = df['Year'] - df['Year'].mean()
df['y - y_mean'] = df['Gold'] - df['Gold'].mean()

sum_xx = sum(df['X - X_mean']**2)
sum_xy = sum(df['X - X_mean']*df['y - y_mean'])

In [156]:
B1 = sum_xy/sum_xx

B0 = df['Gold'].mean() - B1*df['Year'].mean()

In [157]:
df['Pred_Price_Gold'] = B0 + B1*df['Year']

In [158]:
mse = sum((df['Gold']-df['Pred_Price_Gold'])**2)/len(df['Year'])
rmse = mse**0.5

print(f'B0: {B0}')
print(f'B1: {B1}')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')

B0: -1336226.7042665107
B1: 675.3436586791349
MSE: 62644360.72375501
RMSE: 7914.819058181622


## Predicting

In [159]:
pred_2025 = B0 + B1*2025
print(f'Predicted gold value in 2025 for 1 gram: {pred_2025/10}')

Predicted gold value in 2025 for 1 gram: 3134.4204558737574


## Using SKLearn

In [160]:
X = df['Year'].to_numpy()
y = df['Gold'].to_numpy()

lr = LinearRegression()
lr.fit(X.reshape(-1,1), y)
predictions = lr.predict(X.reshape(-1,1))

mse_skl = mean_squared_error(y, predictions)
rmse_skl = mse_skl**0.5
B0_skl = lr.intercept_
B1_skl = lr.coef_[0]

print(f'B0_skl: {B0_skl}')
print(f'B1_skl: {B1_skl}')
print(f'MSE_skl: {mse_skl}')
print(f'RMSE_skl: {rmse_skl}')

B0_skl: -1336226.704266511
B1_skl: 675.343658679135
MSE_skl: 62644360.72375505
RMSE_skl: 7914.819058181623


# Q2

In [161]:
df.dropna(inplace=True)

df['x1_2'] = df['Year']**2
df['x2_2'] = df['Gold']**2
df['x1y'] = df['Year']*df['Silver']
df['x2y'] = df['Gold']*df['Silver']
df['x1x2'] = df['Year']*df['Gold']

sum_x1_2 = sum(df['x1_2']) - (sum(df['Year'])**2)/len(df['Year'])
sum_x2_2 = sum(df['x2_2']) - (sum(df['Gold'])**2)/len(df['Year'])
sum_x1y = sum(df['x1y']) - (sum(df['Year'])*sum(df['Silver']))/len(df['Year'])
sum_x2y = sum(df['x2y']) - (sum(df['Gold'])*sum(df['Silver']))/len(df['Year'])
sum_x1x2 = sum(df['x1x2']) - (sum(df['Year'])*sum(df['Gold']))/len(df['Year'])

In [162]:
denom = sum_x1_2*sum_x2_2 - sum_x1x2**2

B1 = (sum_x2_2*sum_x1y - sum_x1x2*sum_x2y)/denom
B2 = (sum_x1_2*sum_x2y - sum_x1x2*sum_x1y)/denom
B0 = df['Silver'].mean() - B1*df['Year'].mean() - B2*df['Gold'].mean()

In [164]:
X = df[['Year', 'Gold']]
y = df['Silver'].to_numpy()

lr = LinearRegression()
lr.fit(X, y)
predictions = lr.predict(X)

mse_skl = mean_squared_error(y, predictions)
rmse_skl = mse_skl**0.5
B0_skl = lr.intercept_
B1_B2 = lr.coef_

print(f'B0_skl: {B0_skl}')
print(f'B1_skl: {B1_B2}')
print(f'MSE_skl: {mse_skl}')
print(f'RMSE_skl: {rmse_skl}')

B0_skl: -406636.4260883493
B1_skl: [205.61188432   1.12883488]
MSE_skl: 27970705.328672245
RMSE_skl: 5288.733811478154
