In [65]:
import pandas as pd
import numpy as np
from datetime import datetime
from google.cloud import storage # save the model to GCS

In [59]:
# Data stored in GCP, bucket name: 'gold-price-prediction' van TawabG
gold_dataset = pd.read_csv("gs://gold-price-prediction/gold_dataset.csv") 

In [60]:
def pre_processing(gold_dataset):
    
    gold_dataset['date'] = pd.to_datetime(gold_dataset['date'], errors='coerce')
    gold_dataset['date'] = gold_dataset['date'].dt.date
    gold_dataset = gold_dataset.set_index('date')
    
    return gold_dataset

In [98]:
preprocessed_gold_dataset = pre_processing(gold_dataset)
preprocessed_gold_dataset

Unnamed: 0_level_0,timestamp,metal,exchange,currency,price,prev_close_price,ch,chp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,1.577875e+12,XAU,LBMA,EUR,1355.7059,1355.7059,0.0000,0.0000
2020-01-02,1.577961e+12,XAU,LBMA,EUR,1358.4830,1360.6718,-2.1889,-0.1611
2020-01-03,1.578047e+12,XAU,LBMA,EUR,1388.1762,1364.0890,24.0872,1.7352
2020-01-04,1.578134e+12,XAU,LBMA,EUR,1388.1762,1388.1762,0.0000,0.0000
2020-01-05,1.578220e+12,XAU,LBMA,EUR,1388.1762,1388.1762,0.0000,0.0000
...,...,...,...,...,...,...,...,...
2020-11-15,1.605436e+12,XAU,LBMA,EUR,1589.6741,1589.6741,0.0000,0.0000
2020-11-16,1.605523e+12,XAU,LBMA,EUR,1599.8309,1587.6585,12.1724,0.7609
2020-11-17,1.605609e+12,XAU,LBMA,EUR,1586.7699,1592.8295,-6.0596,-0.3819
2020-11-18,1.605695e+12,XAU,LBMA,EUR,1581.7324,1588.6417,-6.9093,-0.4368


In [95]:
def create_features_dataframe(dataset):
    
    gold_features_df = pd.DataFrame()
    average_days_window_closing_price = [7, 30]
    
    for window in average_days_window_closing_price:
        gold_features_df['Mean__'+str(window)+'_days'] = dataset['price'].rolling(window).mean().shift(periods=1)
        gold_features_df['Std__'+str(window)+'_days'] = dataset['price'].rolling(window).std().shift(periods=1)
        gold_features_df['Max__'+str(window)+'_days'] = dataset['price'].rolling(window).max().shift(periods=1)
        gold_features_df['Min__'+str(window)+'_days'] = dataset['price'].rolling(window).min().shift(periods=1)

    return gold_features_df

In [96]:
features_df = create_features_dataframe(preprocessed_gold_dataset)

In [97]:
features_df.head()

Unnamed: 0_level_0,Mean__7_days,Std__7_days,Max__7_days,Min__7_days,Mean__30_days,Std__30_days,Max__30_days,Min__30_days
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,,,,,,,,
2020-01-02,,,,,,,,
2020-01-03,,,,,,,,
2020-01-04,,,,,,,,
2020-01-05,,,,,,,,
