In [8]:
import numpy as np
import pandas as pd
import torch,sklearn
from sklearn import model_selection
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torcheval import metrics
import time
import uuid
import kalshi_python
from kalshi_python.models import *
import pickle
import requests
import ast
import json
    
    



In [54]:
class NN(nn.Module):
    def __init__(self,input_size,h1,h2):
        super().__init__()
        self.input_size = input_size
        self.h1 = h1
        self.h2 = h2
        self.layers = self.get_fc_layers()

    def get_fc_layers(self):
        layers = nn.Sequential(
            nn.Linear(self.input_size, self.h1,dtype = torch.float64),
            nn.ReLU(),
            nn.Linear(self.h1, self.h2,dtype = torch.float64),
            nn.ReLU(),
            nn.Linear(self.h2, 1,dtype = torch.float64),
           )
        return layers
        
    def forward(self, input):
        x = self.layers(input)
        return x

    def month_fun(self,m):
            d = {1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0}
            d[m] += 1.0
            return d

    def stat_fun(self,s):
            d = {'USW00012839' : 0, 'USW00014819' :0, 'USW00013904' :0, 'USW00094728': 0}
            d[s] += 1.0
            return d

    def load_data(self,datastr : str,features,target):
        dataset = pd.read_csv(datastr)
        dataset = dataset[features+[target,"STATION","Month"]]
        dataset.dropna(inplace=True)
        dataset[target] = dataset[target].astype("float")
        for f in features:
            dataset[f] = dataset[f].astype("float")
        dataset[["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]] = \
            dataset.apply(lambda x : self.month_fun(x["Month"]),axis = 1,result_type="expand")
        dataset[['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728']] =\
                dataset.apply(lambda x : self.stat_fun(x["STATION"]),axis = 1, result_type="expand")
        dataset.dropna(inplace=True)
        dataset.drop(columns = ["Month","STATION"],inplace = True)
        train,test = model_selection.train_test_split(dataset,test_size = 0.1,random_state = 3)
        train_x = train.drop(columns = [target])
        train_y = train[target]
        test_x = test.drop(columns = [target])
        test_y = test[target]
        self.train_x,self.train_y,self.test_x,self.test_y = train_x,train_y,test_x,test_y
        
    def train(self):
        criterion = nn.MSELoss()
        optimizer = optim.Adam(self.parameters(), lr=0.003)
        train_loader = DataLoader(list(zip(torch.tensor(self.train_x.values),torch.tensor(self.train_y.values))),batch_size  = 64)
        
        metric = metrics.R2Score()
        epoch = 0
        loss_prev = np.inf
        while True:
            epoch += 1
            start_time = time.time()
            total_correct = 0
            total_loss = 0
            for i, data in enumerate(train_loader, 0):
                inputs, labels = data
                optimizer.zero_grad() #Set all graidents to zero for each step as they accumulate over backprop
                inputs = inputs.view(inputs.shape[0], -1)                
                outputs = self.forward(inputs).view(inputs.shape[0])
                loss = criterion(outputs,labels)
                loss.backward() #computes dloss/dx for every parameter x which has requires_grad=True
                optimizer.step() # x += -lr * x.grad ie updates the weights of the parameters
                total_loss += loss.item()
            end_time = time.time() - start_time
            total_loss /= len(self.train_x)
            print("Epoch no.",epoch ,"|total_loss: ", total_loss, "| epoch_duration: ", round(end_time,2),"sec")
            # Printing out statistics
            if abs(total_loss-loss_prev) < 0.0001:
                break
            else:
                loss_prev = total_loss
            
        inputs = torch.tensor(self.test_x.values)
        labels = torch.tensor(self.test_y.values)
        outputs = self.forward(inputs)
        outputs = outputs.view(inputs.shape[0])
        metricR2 = metrics.R2Score()
        metricR2.update(outputs,labels)
        metricR2.compute()
        print("R2",metricR2.compute())

    def predict(self,inputs):
        for f in inputs.columns:
            inputs[f] = inputs[f].astype("float")
        return model.forward(torch.tensor(inputs.values)).view(inputs.shape[0])
        
def make_orders(predictions,date):
    config = kalshi_python.Configuration()
    config.host = 'https://demo-api.kalshi.co/trade-api/v2'
    client = kalshi_python.ApiInstance(
    email="mlevine6@bu.edu",
    password= "in5u>e@t}MaE:mM",
    configuration=config,
)
    tickers = ["HIGHNY","HIGHCHI","HIGHMIA","HIGHAUS"]
    for ticker in tickers:
        pred = predictions[ticker]
        ticker = ticker + "-" + date
        response = client.get_event(ticker)
        temps = [float(o.ticker.split("-")[-1][1:]) for o in response.markets]
        diffs = [abs(pred-t) for t in temps]
        pick = [o.ticker for o in response.markets][np.argmin(diffs)]
        exchangeStatus = client.get_exchange_status()
        if exchangeStatus.trading_active:
            orderUuid = str(uuid.uuid4())
            orderResponse = client.create_order(CreateOrderRequest(
                ticker=pick,
                action='buy',
                type='limit',
                yes_price=50,
                count=1,
                client_order_id=orderUuid,
                side='yes',
            ))
            print(orderResponse)
            
def update(day):
    nyurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC"
    miaurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=FL_ASOS&stations=MIA"
    chiurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=IL_ASOS&stations=MDW"
    ausurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=TX_ASOS&stations=AUS"
    with open('vars.json', 'r') as f:
        data = json.load(f)
        pday = data["prev_day"]
        diff = day - pday
        if diff <= 0:
            return
        url = "&year1=2024&month1=3&day1=" + str(pday + 1) + "&year2=2024&month2=3&day2=" + str(day) + "&var=max_temp_f&na=blank&format=csv"
        for city in ["NY","M","C","A"]:
            u = {"NY" : nyurl,"M" : miaurl, "C" : chiurl,"A" : ausurl}[city] + url
            r = requests.get(u).text
            lines = r.split("\n")[1:-1]
            newlist = data[city]
            newlist = newlist[diff:]
            for line in lines:
                print(line)
                newlist.append(float(line.split(",")[-1]))
            data[city] = newlist
        data["prev_day"] = day
    with open('vars.json', 'w') as f:
        json.dump(data,f)
        
def load_era5_data(day):
    key = "a5c8c0da0c5f449fb21e280d3b6dbd3a"
    url = 'https://api.oikolab.com/weather'
    r = requests.get(url,
                     params={'param': ['temperature'],
                             'lat': [40.78,41.7,30.2,25.8],
                             'lon': [-74,-87.7,-97.7,80.3],
                             'location_id': ['NY','C','A','M'],
                             'freq' : 'D',
                             'start': '2024-03-' + str(day),
                             'end': '2024-03-' + str(day)},
                     headers={'api-key': key}
                     )
    a = ast.literal_eval(ast.literal_eval(r.text)["data"])
    a = pd.DataFrame(columns = a["columns"], data = a["data"])


def load_daily_data(day):
    update(day)
    a = load_era5_data(day)
    with open('vars.json', 'r') as f:
        data = json.load(f)
    M_daily,C_daily,A_daily,NY_daily = data["M"],data["C"],data["A"],data["NY"]
    data = pd.DataFrame({"STATION":['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728'],\
                  "Month":[3,3,3,3],"days" : [M_daily,C_daily,A_daily,NY_daily]})
    data[["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]] = \
                data.apply(lambda x : model.month_fun(x["Month"]),axis = 1,result_type="expand")
    data[['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728']] =\
                  data.apply(lambda x : model.stat_fun(x["STATION"]),axis = 1, result_type="expand")
    gfs = {'USW00012839' : a.loc[0]["temperature (degC)"], 'USW00014819' : a.loc[3]["temperature (degC)"], 'USW00013904' : a.loc[1]["temperature (degC)"] , 'USW00094728' : a.loc[2]["temperature (degC)"]}
    # data[["temp_forecast"]] =  data.apply((lambda x : gfs[x["STATION"]]* 1.8 + 32),axis = 1, result_type="expand")
    for i,row in data.iterrows():
        data.at[i,"Week_av"] = np.sum(row["days"][-7:])/7
        data.at[i,"2Week_av"] = np.sum(row["days"][-14:])/14
        data.at[i,"Month_av"] = np.sum(row["days"])/30
        data.at[i,"d1"],data.at[i,"d2"],data.at[i,"d3"],data.at[i,"d4"],data.at[i,"d5"],data.at[i,"d6"],data.at[i,"d7"] = row["days"][-7:][::-1]
        url = "&year1=2024&month1=3&day1=" + str(day) + "&year2=2024&month2=3&day2=" + str(day) + "&var=min_temp_f&var=precip_in&var=max_rh&na=blank&format=csv"
        url2 = "&year1=2024&month1=3&day1=" + str(day-1) + "&year2=2024&month2=3&day2=" + str(day-1) + "&var=min_temp_f&var=precip_in&var=max_rh&na=blank&format=csv"
        ud = { 'USW00094728' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC",\
         'USW00012839' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=FL_ASOS&stations=MIA",\
          'USW00014819' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=IL_ASOS&stations=MDW",\
         'USW00013904' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=TX_ASOS&stations=AUS"}
        url = ud[row["STATION"]] + url
        url2 = ud[row["STATION"]] + url2
        r = requests.get(url).text
        r2 = requests.get(url2).text
        data.at[i,"TMIN"] = float(r.split("\n")[1].split(",")[-3])
        data.at[i,"max_rh"] = float(r.split("\n")[1].split(",")[-1])
        p = r2.split("\n")[1].split(",")[-2]
        p = 0 if p == '' else float(p)
        data.at[i,"ry"] = p
        tick = {3:'NY',1:'C',2:'A',0:'M'}[i]
        data.at[i,"temp_forecast"] = gfs[row["STATION"]] * 1.8 + 32
    data.drop(["Month","STATION","days"],axis = 1,inplace = True)
    data = data[['TMIN', "ry","max_rh",'temp_forecast', 'Week_av', '2Week_av',
           'Month_av', 'd1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'Jan', 'Feb',
           'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
           'USW00012839', 'USW00014819', 'USW00013904', 'USW00094728']]
    return data

In [92]:
features = ["TMIN","ry","max_rh","temp_forecast","Week_av","2Week_av","Month_av"]
for i in range(1,8):
    features.append("d" + str(i))
model = NN(30,48,24)
model.load_data("./data/for_1965_2024.csv",features,"TMAX")
model.train()
torch.save(model.state_dict(),"NNModel")

Epoch no. 1 |total_loss:  1.6713355010915119 | epoch_duration:  2.55 sec
Epoch no. 2 |total_loss:  0.4815139829405727 | epoch_duration:  2.64 sec
Epoch no. 3 |total_loss:  0.4521487478029229 | epoch_duration:  2.56 sec
Epoch no. 4 |total_loss:  0.43700898922858356 | epoch_duration:  2.54 sec
Epoch no. 5 |total_loss:  0.41139763005811936 | epoch_duration:  2.65 sec
Epoch no. 6 |total_loss:  0.37677811586586274 | epoch_duration:  2.61 sec
Epoch no. 7 |total_loss:  0.35082945595523524 | epoch_duration:  2.61 sec
Epoch no. 8 |total_loss:  0.3170277943899437 | epoch_duration:  2.63 sec
Epoch no. 9 |total_loss:  0.29274628828729443 | epoch_duration:  2.7 sec
Epoch no. 10 |total_loss:  0.2777702021348956 | epoch_duration:  2.63 sec
Epoch no. 11 |total_loss:  0.2711875294852956 | epoch_duration:  2.65 sec
Epoch no. 12 |total_loss:  0.2651355466134342 | epoch_duration:  2.69 sec
Epoch no. 13 |total_loss:  0.2619869564961213 | epoch_duration:  2.57 sec
Epoch no. 14 |total_loss:  0.25844224649239

In [8]:
model = NN()
model.load_state_dict(torch.load("NNModel"))
data = load_daily_data(24)
print(data)
preds = model.predict(data)
preds = {"HIGHNY" : preds[3].item(),"HIGHCHI": preds[1].item(),"HIGHMIA" : preds[0].item(),"HIGHAUS" : preds[2].item()}
print(preds)

   TMIN      ry     max_rh  temp_forecast    Week_av   2Week_av   Month_av  \
0  64.0  1.5500  83.781500         87.476  77.142857  80.214286  80.633333   
1  33.0  0.0001  58.558560         37.436  38.714286  50.571429  53.166667   
2  56.0  0.0000  96.463680         63.554  60.428571  68.214286  72.633333   
3  31.0  3.6600  56.270382         37.400  43.857143  53.285714  52.833333   

     d1    d2    d3  ...  Jul  Aug  Sep  Oct  Nov  Dec  USW00012839  \
0  76.0  74.0  75.0  ...  0.0  0.0  0.0  0.0  0.0  0.0          1.0   
1  44.0  37.0  37.0  ...  0.0  0.0  0.0  0.0  0.0  0.0          0.0   
2  64.0  65.0  59.0  ...  0.0  0.0  0.0  0.0  0.0  0.0          0.0   
3  44.0  49.0  36.0  ...  0.0  0.0  0.0  0.0  0.0  0.0          0.0   

   USW00014819  USW00013904  USW00094728  
0          0.0          0.0          0.0  
1          1.0          0.0          0.0  
2          0.0          1.0          0.0  
3          0.0          0.0          1.0  

[4 rows x 30 columns]
{'HIGHNY': 44.8

In [9]:
make_orders(preds,"24MAR24")

{'order': {'action': 'buy',
           'client_order_id': '91bc8572-4d0e-49cf-83be-ca17a5b2c68e',
           'created_time': '2024-03-24T17:32:25.856246Z',
           'expiration_time': None,
           'no_price': 50,
           'order_id': '565b7b48-05a3-4841-8fc1-7cc8dc22ce28',
           'side': 'yes',
           'status': 'executed',
           'ticker': 'HIGHNY-24MAR24-B45.5',
           'type': 'limit',
           'user_id': '9ac6156d-6869-47cf-ba13-74df1b5e51f8',
           'yes_price': 50}}
{'order': {'action': 'buy',
           'client_order_id': '3f79099d-d9de-4c5f-bd66-25c6f76f623d',
           'created_time': '2024-03-24T17:32:26.152716Z',
           'expiration_time': None,
           'no_price': 50,
           'order_id': '958b94fb-ee57-4a1f-bf1e-4107f0c0ea29',
           'side': 'yes',
           'status': 'executed',
           'ticker': 'HIGHCHI-24MAR24-T49',
           'type': 'limit',
           'user_id': '9ac6156d-6869-47cf-ba13-74df1b5e51f8',
           'yes_pric

In [102]:
data = pd.read_csv("for_1965_2024.csv")
model = NN(30,48,24)
model.load_state_dict(torch.load("NNModel"))

<All keys matched successfully>