### **IMPORTING ALL THE LIBRARIES**


In [None]:
import numpy as ny
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

### **COLLECTION OF DATA AND ITS PROCESSING**

In [None]:
gold_data_set=pd.read_csv('/content/archive (4).zip')

**BASIC INFORMATION**

In [None]:
gold_data_set.info()


**CHECKING NUMBER OF MISSING VALUES**

In [None]:
gold_data_set.isnull().sum()


**STATISTICAL MEASURES OF THE DATA**

In [None]:
gold_data_set.describe()

**CORRELATION**

In [None]:
correlation=gold_data_set.corr()

**HEATMAP CONSTRUCTION**

In [None]:
plt.figure(figsize=(8,8))
sbn.heatmap (correlation, cbar=True, square=True, fmt='.1f', annot=True, annot_kws={'size':8}, cmap='Blues')
plt.title('CORRELATION MATRIX HEATMAP')
plt.xlabel('FEATURES')
plt.ylabel('FEATURES')

**GLD CORRELATION**

In [None]:
print(correlation['GLD'])

**GLD DISTRIBUTION**

In [None]:
sbn.distplot(gold_data_set['GLD'], color='green')

**HISTOGRAM OF EACH COLUMN**

In [None]:
fig = plt.figure(figsize=(8, 8))

# suptitle of the graph
fig.suptitle('Distribution of data across column')
temp = gold_data_set.drop("Date", axis=1).columns.tolist()
for i, item in enumerate(temp):
	plt.subplot(2, 3, i+1)
	sbn.histplot(data=gold_data_set, x=item, kde=True)
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.0)
plt.show()


**CHECKING SKEWNESS OF HISTOGRAM**

In [None]:
print(gold_data_set.skew(axis=0,skipna=True))

**REMOVING SKEWNESS**

In [None]:
gold_data_set["USO"] = gold_data_set["USO"]\
.apply(lambda x: ny.sqrt(x))
gold_data_set["SLV"] = gold_data_set["SLV"]\
.apply(lambda x: ny.sqrt(x))


**SPLITTING FEATURES AND TARGET**


In [None]:

x = gold_data_set.drop(['Date', 'GLD'], axis=1)

y = gold_data_set['GLD']
print(x)
print(y)


## **SPLITTING INTO TRAINING AND TESTING DATA**

In [None]:

x_train, x_test,\
	y_train, y_test = train_test_split(x, y,
									test_size=0.2,random_state=2)


## ***MODEL TRAINING USING RANDOM FOREST  ***

In [None]:
regressor=RandomForestRegressor(n_estimators=100)
regressor.fit(x_train,y_train)

## **MODEL EVALUATION**

In [None]:
test_data_prediction=regressor.predict(x_test)
print(test_data_prediction)


In [None]:
error_score= metrics.r2_score(y_test, test_data_prediction)
print("R squared error:",error_score)

In [None]:
y_test=list(y_test)
plt.plot(y_test,color='black',label='Actual Value')
plt.plot(test_data_prediction,color='blue',label='Predicted Value')
plt.title('Actual V/S Predicted ')
plt.xlabel('Number of values')
plt.ylabel('GLD Price')
plt.legend()
plt.show()
