In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import time
import os
from blockchain_parser.blockchain import Blockchain

# Load in weather data
data = pd.read_csv("./TX_weather/TME_202109010000_202203310000.txt")

In [7]:
# What are the different weather types?

np.unique(data.skyc1)
# CLR: Clear (0 oktas)
# FEW: Few (1-2 oktas)
# SCT: Scattered (3-4 oktas)
# BKN: Broken (5-7 oktas)
# OVC: Overcast (8 oktas)
# M: Missing

array(['BKN', 'CLR', 'FEW', 'M', 'OVC', 'SCT', 'VV '], dtype=object)

In [8]:
# Data exploration
types = np.unique(data.skyc1)
n_types = np.zeros(7)
for i in range(len(data.skyc1)):
    for t in range(len(types)):
        if data.skyc1[i] == types[t]:
            n_types[t] += 1
            
for t in range(len(types)):
    print(n_types[t], types[t], " days out of ", len(data.skyc1))
        
print(n_types, len(data.skyc1))

1321.0 BKN  days out of  14865
6628.0 CLR  days out of  14865
1471.0 FEW  days out of  14865
2303.0 M  days out of  14865
1968.0 OVC  days out of  14865
1102.0 SCT  days out of  14865
72.0 VV   days out of  14865
[1321. 6628. 1471. 2303. 1968. 1102.   72.] 14865


In [2]:
stamps = []
blockchain = Blockchain(os.path.expanduser('/media/sam/Seagate1TB/data/bitcoin/blocks'))
                        
for block in blockchain.get_ordered_blocks(os.path.expanduser('/media/sam/Seagate1TB/data/bitcoin/blocks/index'), start=675000, cache='index-cache.pickle'):
    stamps.append(block.header.timestamp)
    #print(block.height, block.header.timestamp)

In [3]:
# Create a checkpoint so we don't need to redo the steps above in the future but can directly load in the stamps.
with open("stamps_blk675000+.txt", "w") as f:
    for item in stamps:
        f.write("%s\n" % item)

In [4]:
with open("stamps_blk675000+.txt", "r") as f:
    lines = f.readlines()

In [2]:
# We now have blocktimes and weather. Need to delta(blocktimes) and weather.
# 1. Turn weather-types into integers
# 2. Compute delta(stamps) (differences in block timestamps, i.e. actual blocktimes)
# 3. Link the two so that they temporally align.

# 1
with open("weather_skyc1_march15_april30.txt", "r") as f:
    weather = f.readlines()

wi = np.zeros(len(weather))
VVs = []
for i in range(len(weather)):
    if weather[i] == "CLR\n":
        wi[i] = 0
    elif weather[i] == "FEW\n":
        wi[i] = 1
    elif weather[i] == "SCT\n":
        wi[i] = 2
    elif weather[i] == "BKN\n":
        wi[i] = 3
    elif weather[i] == "OVC\n":
        wi[i] = 4
    elif weather[i] == "VV \n":
        VVs.append(i)
    else:
        print(i, weather[i])
    
# CLR: Clear (0 oktas)
# FEW: Few (1-2 oktas)
# SCT: Scattered (3-4 oktas)
# BKN: Broken (5-7 oktas)
# OVC: Overcast (8 oktas)
# M: Missing

# 2
with open("stamps_blk675000+.txt", "r") as f:
    blocktimes = f.readlines()
    
FMT = "%Y-%m-%d %H:%M:%S"

tdeltas = []
tdeltas_seconds = []
invalid_i = []
for i in range(len(blocktimes)-1):
    
    td = datetime.strptime(blocktimes[i+1][:-1], FMT) - datetime.strptime(blocktimes[i][:-1], FMT)
    
    if td.days < 0:
        invalid_i.append(i)
    else:
        dt = datetime.strptime(blocktimes[i+1][:-1], FMT) - datetime.strptime(blocktimes[i][:-1], FMT)
        tdeltas.append( dt )
        tdeltas_seconds.append( int(dt.total_seconds()) )

In [3]:
# 3
# Functions to find the nearest item/index in terms of time:
def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))

def nearest_ind(items, pivot):
    time_diff = np.abs([date - pivot for date in items])
    return time_diff.argmin(0)

blocktimes = blocktimes[:-1] # last blocktime can't have a delta
# tdeltas are blocktimes but we have removed invalid_i
blocktimes = np.delete(blocktimes, invalid_i) 

print("size check:", len(blocktimes), len(tdeltas), len(weather))

# We now need to go through the blocktimes and find the closest weather TIME
with open("weather_times_march15_april30.txt", "r") as f:
    weather_time = f.readlines()
    
FMT_blk = "%Y-%m-%d %H:%M:%S"
FMT_wtr = "%Y-%m-%d %H:%M"

# convert blocktimes to datetime class
blocktimes_dt = []
for i in range(len(blocktimes)):
    blocktimes_dt.append( datetime.strptime(blocktimes[i][:-1], FMT_blk) )
    
weathertimes_dt = []
for i in range(len(weather_time)):
    weathertimes_dt.append( datetime.strptime(weather_time[i][:-1], FMT_wtr) )
    
# Perform the actual match
wtr_t_match = np.zeros(len(blocktimes_dt))
upper = len(blocktimes_dt)
indexo = 0
radius = 50 # lower=faster but more errorprone (anything above ~50 is very unlikely to yield errors)
for i in range(len(blocktimes_dt)):
    # Texas is UTC-5, while bitcoin headers are UTC
    
    if indexo < radius:
        L = 0
        U = indexo + radius
    elif indexo + radius > upper:
        L = indexo - radius
        U = upper
    else:
        L = indexo - radius
        U = indexo + radius
        
    index = nearest_ind(weathertimes_dt[L:U+1], blocktimes_dt[i] -timedelta(hours=5))
    indexo = index + L # offset index by L ('raising the floor')
    
    wtr_t_match[i] = indexo

# OR: Load it
# with open("wtr_t_match.txt", "r") as f:
#     wtr_t_match = f.readlines()
    
# Note: Ensure dates from weather data extend beyond block-dates in both direction, otherwise you need to account for this.

# Now we can fetch the weather for each blocktime!
wtr_at_blk = np.zeros(len(blocktimes_dt))
for i in range(len(blocktimes_dt)):
    wtr_at_blk[i] = int(wi[int(wtr_t_match[i])])

size check: 48350 48350 47475


In [4]:
# flag the times (based on blocktimes_dt)
# 0 SUNNY: Between 8-16hr, CLR+FEW+SCT
# 1 CLOUDY: Between 8-16hr, BKN+OVC
# 2 REST: everything else
weather_type = np.zeros(len(blocktimes_dt))

for i in range(len(blocktimes_dt)):
    t = blocktimes_dt[i] -timedelta(hours=5)
    if t.time() > datetime.strptime('08:00', '%H:%M').time() and t.time() < datetime.strptime('16:00', '%H:%M').time():
        if wtr_at_blk[i] < 2.5:
            weather_type[i] = 0
        else:
            weather_type[i] = 1
    else:
        weather_type[i] = 2

In [5]:
# Put everything together:
data = {
    "blocktimes_dt": blocktimes_dt,
    "weather_blk": wtr_at_blk,
    "tdeltas": tdeltas_seconds,
    "weather_type": weather_type}

In [6]:
df = pd.DataFrame(data=data)

<class 'numpy.float64'>


In [7]:
df.to_pickle("weathertexasBTC.pickle") 