In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from math import isnan

In [3]:
data = pd.read_csv('gold_clean.csv')
data = data.rename(columns = {'ts': 'unixtime'})
data['ts'] = pd.to_datetime(data.unixtime)
data = data[(data.ts >= '2018-02-10') & (data.ts <= '2018-02-22')]

print(f'len(data) = {len(data)}')
data.head()

len(data) = 10792


Unnamed: 0,unixtime,open,high,low,close,vol,ts
39280,1518220800000000000,1316.7,1317.1,1316.7,1317.0,259,2018-02-10 00:00:00
39281,1518220860000000000,1317.0,1317.0,1316.6,1316.7,103,2018-02-10 00:01:00
39282,1518220920000000000,1316.8,1317.1,1316.7,1317.1,139,2018-02-10 00:02:00
39283,1518220980000000000,1317.0,1317.6,1317.0,1317.5,198,2018-02-10 00:03:00
39284,1518221040000000000,1317.6,1318.3,1317.6,1318.1,218,2018-02-10 00:04:00


In [5]:
np.split(data, [int(0.6 * len(data))])

[                  unixtime    open    high     low   close  vol  \
 39280  1518220800000000000  1316.7  1317.1  1316.7  1317.0  259   
 39281  1518220860000000000  1317.0  1317.0  1316.6  1316.7  103   
 39282  1518220920000000000  1316.8  1317.1  1316.7  1317.1  139   
 39283  1518220980000000000  1317.0  1317.6  1317.0  1317.5  198   
 39284  1518221040000000000  1317.6  1318.3  1317.6  1318.1  218   
 ...                    ...     ...     ...     ...     ...  ...   
 45750  1518800160000000000  1358.3  1358.9  1358.2  1358.7  344   
 45751  1518800220000000000  1358.7  1358.7  1358.2  1358.2  244   
 45752  1518800280000000000  1358.2  1358.4  1358.2  1358.2  100   
 45753  1518800340000000000  1358.2  1358.4  1358.0  1358.2  293   
 45754  1518800400000000000  1358.3  1358.3  1357.8  1358.0  143   
 
                        ts  
 39280 2018-02-10 00:00:00  
 39281 2018-02-10 00:01:00  
 39282 2018-02-10 00:02:00  
 39283 2018-02-10 00:03:00  
 39284 2018-02-10 00:04:00  
 ...    

In [9]:
d1, d2, d3, d4, d5, d6, d7, d8, d9, d10 = np.array_split(data, 10)

In [10]:
d3

Unnamed: 0,unixtime,open,high,low,close,vol,ts
41440,1518530520000000000,1331.9,1332.0,1331.8,1331.9,75,2018-02-13 14:02:00
41441,1518530580000000000,1331.8,1331.9,1331.4,1331.4,102,2018-02-13 14:03:00
41442,1518530640000000000,1331.5,1331.7,1331.4,1331.5,101,2018-02-13 14:04:00
41443,1518530700000000000,1331.5,1331.6,1331.5,1331.6,27,2018-02-13 14:05:00
41444,1518530760000000000,1331.6,1331.6,1331.5,1331.6,14,2018-02-13 14:06:00
...,...,...,...,...,...,...,...
42514,1518598620000000000,1337.7,1337.7,1337.3,1337.3,43,2018-02-14 08:57:00
42515,1518598680000000000,1337.3,1337.5,1337.2,1337.4,49,2018-02-14 08:58:00
42516,1518598740000000000,1337.4,1337.4,1337.3,1337.4,19,2018-02-14 08:59:00
42517,1518598800000000000,1337.3,1337.3,1337.1,1337.2,63,2018-02-14 09:00:00


In [11]:
d4

Unnamed: 0,unixtime,open,high,low,close,vol,ts
42519,1518598920000000000,1337.3,1337.3,1336.9,1337.0,70,2018-02-14 09:02:00
42520,1518598980000000000,1337.0,1337.1,1337.0,1337.0,23,2018-02-14 09:03:00
42521,1518599040000000000,1337.0,1337.1,1337.0,1337.1,30,2018-02-14 09:04:00
42522,1518599100000000000,1337.1,1337.2,1336.9,1336.9,29,2018-02-14 09:05:00
42523,1518599160000000000,1336.9,1337.0,1336.8,1337.0,57,2018-02-14 09:06:00
...,...,...,...,...,...,...,...
43593,1518666960000000000,1353.6,1353.8,1353.4,1353.6,239,2018-02-15 03:56:00
43594,1518667020000000000,1353.6,1353.8,1353.5,1353.8,54,2018-02-15 03:57:00
43595,1518667080000000000,1353.7,1353.8,1353.6,1353.8,44,2018-02-15 03:58:00
43596,1518667140000000000,1353.8,1354.3,1353.8,1354.2,81,2018-02-15 03:59:00
