# Libraries

In [221]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Data Collection

In [222]:
df = pd.read_csv('Gold vs USDINR.csv')

In [223]:
df.head()

Unnamed: 0,Date,USD_INR,Goldrate
0,2024-01-01,83.240601,"₹5,066.31"
1,2024-01-08,83.076103,"₹4,966.31"
2,2024-01-15,83.160599,"₹5,015.33"
3,2024-01-22,83.146103,"₹4,950.84"
4,2024-01-29,82.927597,"₹4,976.77"


In [224]:
print(f"Rows: {df.shape[0]}\nCols: {df.shape[1]}")

Rows: 53
Cols: 3


# EDA

### Data Cleaning

In [225]:
df.drop(columns=['Date'], inplace=True)

In [226]:
df['Goldrate'] = df['Goldrate'].replace('₹','',regex=True).replace(',','', regex=True).astype(float)

In [227]:
df.head()

Unnamed: 0,USD_INR,Goldrate
0,83.240601,5066.31
1,83.076103,4966.31
2,83.160599,5015.33
3,83.146103,4950.84
4,82.927597,4976.77


### Handle Null Values

In [228]:
df.isnull().sum()

USD_INR     0
Goldrate    0
dtype: int64

### Handle Outliers

In [229]:
import scipy.stats as stats

z_score = stats.zscore(df)
threshold = 3
outlier_df = df[(abs(z_score)>threshold).any(axis=1)]

In [230]:
outlier_df.shape

(1, 2)

In [231]:
outlier_df

Unnamed: 0,USD_INR,Goldrate
52,85.786598,6585.39


In [232]:
df.USD_INR.min(), df.USD_INR.max()

(np.float64(82.75229645), np.float64(85.78659821))

Since the range is between 52 to 86 it is not an Outlier

# Model Training

In [233]:
X = df[['USD_INR']]
y = df['Goldrate']

In [234]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [235]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [236]:
model = LinearRegression()

In [237]:
model.fit(X_train, y_train)

In [238]:
model.score(X_test, y_test)

0.7578780702410666

In [239]:
slope = model.coef_[0]
intercept = model.intercept_

In [240]:
print(f"Slope: {slope}\nIntercept: {intercept}")

Slope: 469.2075576153299
Intercept: 5878.573404255313


In [241]:
y_pred = model.predict(X_test)

# Model Evaluvation

In [242]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [243]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [245]:
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2} (~75.78%)")

Mean Absolute Error (MAE): 280.90563855649253
Mean Squared Error (MSE): 113492.62498473369
R² Score: 0.7578780702410666 (~75.78%)
