In [15]:
import numpy as np
import pandas as pd
import torch,sklearn
from sklearn import model_selection
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torcheval import metrics
import time
import uuid
import kalshi_python
from kalshi_python.models import *
import pickle

    
    



In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [147]:
class NN(nn.Module):
    def __init__(self,input_size,h1,h2):
        super().__init__()
        # define layers
        self.input_size = input_size
        self.h1 = h1
        self.h2 = h2
        self.layers = self.get_fc_layers()

    def get_fc_layers(self):
        layers = nn.Sequential(
            nn.Linear(self.input_size, self.h1,dtype = torch.float64),
            nn.ReLU(),
            nn.Linear(self.h1, self.h2,dtype = torch.float64),
            nn.ReLU(),
            nn.Linear(self.h2, 6,dtype = torch.float64),
            nn.ReLU(),
            nn.Linear(6, 1,dtype = torch.float64),
           )
        return layers
    # define forward function
    def forward(self, input):
        x = self.layers(input)
        return x

    def month_fun(self,m):
            d = {1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0}
            d[m] += 1.0
            return d

    def stat_fun(self,s):
            d = {'USW00012839' : 0, 'USW00014819' :0, 'USW00013904' :0, 'USW00094728': 0}
            d[s] += 1.0
            return d

    def load_data(self,datastr : str,features,target):
    
        dataset = pd.read_csv(datastr)
        dataset = dataset[features+[target,"STATION","Month"]]
        dataset.dropna(inplace=True)
        dataset[target] = dataset[target].astype("float")
        for f in features:
            dataset[f] = dataset[f].astype("float")
        dataset[["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]] = \
            dataset.apply(lambda x : self.month_fun(x["Month"]),axis = 1,result_type="expand")
        dataset[['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728']] =\
                dataset.apply(lambda x : self.stat_fun(x["STATION"]),axis = 1, result_type="expand")
        dataset.dropna(inplace=True)
        dataset.drop(columns = ["Month","STATION"],inplace = True)
        train,test = model_selection.train_test_split(dataset,test_size = 0.1,random_state = 3)
        train_x = train.drop(columns = [target])
        train_y = train[target]
        test_x = test.drop(columns = [target])
        test_y = test[target]
        self.train_x,self.train_y,self.test_x,self.test_y = train_x,train_y,test_x,test_y
    def train(self):
        criterion = nn.MSELoss()
        optimizer = optim.Adam(self.parameters(), lr=0.003)
        train_loader = DataLoader(list(zip(torch.tensor(self.train_x.values),torch.tensor(self.train_y.values))),batch_size  = 64)
        
        metric = metrics.R2Score()
        epoch = 0
        loss_prev = np.inf
        while True:
            epoch += 1
        
            # Initialising statistics that we will be tracking across epochs
            start_time = time.time()
            total_correct = 0
            total_loss = 0
        
            for i, data in enumerate(train_loader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = data
                # zero the parameter gradients: Clean the gradient caclulated in the previous iteration
                optimizer.zero_grad() #Set all graidents to zero for each step as they accumulate over backprop
        
                # forward + backward + optimize
                inputs = inputs.view(inputs.shape[0], -1)
                
                outputs = self.forward(inputs).view(inputs.shape[0])
                loss = criterion(outputs,labels)
        
                # Calculate gradient of matrix with requires_grad = True
                loss.backward() #computes dloss/dx for every parameter x which has requires_grad=True
                optimizer.step() # x += -lr * x.grad ie updates the weights of the parameters
        
                # Adding loss to total loss
                total_loss += loss.item()
            end_time = time.time() - start_time
            total_loss /= len(self.train_x)
            print("Epoch no.",epoch ,"|total_loss: ", total_loss, "| epoch_duration: ", round(end_time,2),"sec")
            # Printing out statistics
            if abs(total_loss-loss_prev) < 0.0001:
                break
            else:
                loss_prev = total_loss
            
        
        inputs = torch.tensor(self.test_x.values)
        labels = torch.tensor(self.test_y.values)
        outputs = model.forward(inputs)
        outputs = outputs.view(inputs.shape[0])
        metricR2 = metrics.R2Score()
        metricR2.update(outputs,labels)
        metricR2.compute()
        print("R2",metricR2.compute())

    def predict(self,inputs):
        inputs = inputs[features+[target,"STATION","Month"]]
        inputs.dropna(inplace=True)
        inputs[target] = inputs[target].astype("float")
        for f in features:
            inputs[f] = inputs[f].astype("float")
        inputs[["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]] = \
            dataset.apply(lambda x : self.month_fun(x["Month"]),axis = 1,result_type="expand")
        inputs[['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728']] =\
                inputs.apply(lambda x : stat_fun(x["STATION"]),axis = 1, result_type="expand")
        inputs.dropna(inplace=True)
        inputs.drop(columns = ["Month","STATION"],inplace = True)
        return model.forward(inputs).view(inputs.shape[0])

def make_orders(self,predictions,date):
    config = kalshi_python.Configuration()
    config.host = 'https://demo-api.kalshi.co/trade-api/v2'
    client = kalshi_python.ApiInstance(
    email="mlevine6@bu.edu",
    password= "in5u>e@t}MaE:mM",
    configuration=config,
)
    tickers = ["HIGHNY","HIGHCHI","HIGHMIA","HIGHAUS"]
    tickers = [ticker + "-" + date for ticker in tickers]
    for ticker in tickers:
        pred = predictions[ticker]
        response = kalshi_api.get_event(tickers[0])
        temps = [float(o.ticker.split("-")[-1][1:]) for o in response.markets]
        diffs = [abs(pred-t) for t in temps]
        pick = [o.ticker for o in response.markets][np.argmin(diffs)]
        if exchangeStatus.trading_active:
            orderUuid = str(uuid.uuid4())
            orderResponse = kalshi_api.create_order(CreateOrderRequest(
                ticker=pick,
                action='buy',
                type='limit',
                yes_price=50,
                count=1,
                client_order_id=orderUuid,
                side='yes',
            ))
            print(orderresponse)
            
            
        
                



In [149]:
features = ["TMIN","temp_forecast","ry","max_rh","Week_av","2Week_av","Month_av"]
for i in range(1,7):
    features.append("d" + str(i))
model = NN(29,24,12)
model.load_data("for_1965_2024.csv",features,"TMAX")
model.train()

    


Epoch no. 1 |total_loss:  1.8121912037580779 | epoch_duration:  0.8 sec
Epoch no. 2 |total_loss:  0.5195702076735446 | epoch_duration:  0.79 sec
Epoch no. 3 |total_loss:  0.4921518197052369 | epoch_duration:  0.75 sec
Epoch no. 4 |total_loss:  0.4866019884995567 | epoch_duration:  0.78 sec
Epoch no. 5 |total_loss:  0.4825170120010136 | epoch_duration:  0.75 sec
Epoch no. 6 |total_loss:  0.4790328329913065 | epoch_duration:  0.82 sec
Epoch no. 7 |total_loss:  0.4710233281964587 | epoch_duration:  0.77 sec
Epoch no. 8 |total_loss:  0.4551418400930584 | epoch_duration:  0.75 sec
Epoch no. 9 |total_loss:  0.437743013809906 | epoch_duration:  0.76 sec
Epoch no. 10 |total_loss:  0.4131635042417949 | epoch_duration:  0.76 sec
Epoch no. 11 |total_loss:  0.3873030022759302 | epoch_duration:  0.83 sec
Epoch no. 12 |total_loss:  0.3608156859319652 | epoch_duration:  0.82 sec
Epoch no. 13 |total_loss:  0.3325535435857284 | epoch_duration:  0.79 sec
Epoch no. 14 |total_loss:  0.3078587706232149 | e

In [153]:
import json
def update(day):
    nyurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC"
    miaurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=FL_ASOS&stations=MIA"
    chiurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=IL_ASOS&stations=MDW"
    ausurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=TX_ASOS&stations=AUS"
    with open('vars.json', 'r') as f:
        data = json.load(f)
        pday = data["prev_day"]
        diff = day - pday
        if diff <= 0:
            return
        url = "&year1=2024&month1=3&day1=" + str(pday + 1) + "&year2=2024&month2=3&day2=" + str(day) + "&var=max_temp_f&na=blank&format=csv"
        for city in ["NY","M","C","A"]:
            u = {"NY" : nyurl,"M" : miaurl, "C" : chiurl,"A" : ausurl}[city] + url
            r = requests.get(u).text
            lines = r.split("\n")[1:-1]
            newlist = data[city]
            newlist = newlist[diff:]
            for line in lines:
                print(line)
                newlist.append(float(line.split(",")[-1]))
            data[city] = newlist
        data["prev_day"] = day
    with open('vars.json', 'w') as f:
        json.dump(data,f)
day = 18
update(day)
with open('vars.json', 'r') as f:
    data = json.load(f)
M_daily,C_daily,A_daily,NY_daily = data["M"],data["C"],data["A"],data["NY"]
data = pd.DataFrame({"STATION":['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728'],\
              "Month":[3,3,3,3],"days" : [M_daily,C_daily,A_daily,NY_daily]})
key = "ade9eb48979c4310ae7b3ba195cfd999"
r = requests.get(url,
                 params={'param': ['temperature'],
                         'lat': [40.78,41.7,30.2,25.8],
                         'lon': [-74,-87.7,-97.7,80.3],
                         'location_id': ['NY','C','A','M'],
                         'freq' : 'D',
                         'start': '2024-03-0' + str(day),
                         'end': '2024-03-0' + str(day)},
                 headers={'api-key': key3}
                 )
a = ast.literal_eval(ast.literal_eval(r.text)["data"])
a = pd.DataFrame(columns = a["columns"], data = a["data"])

for i,row in data.iterrows():
    data.at[i,"d1"],data.at[i,"d2"],data.at[i,"d3"],data.at[i,"d4"],data.at[i,"d5"],data.at[i,"d6"],data.at[i,"d7"] = track[-7:]
    data[["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]] = \
            data.apply(lambda x : self.month_fun(x["Month"]),axis = 1,result_type="expand")
    data[['USW00012839', 'USW00014819', 'USW00013904', 'USW00094728']] =\
              data.apply(lambda x : self.stat_fun(x["STATION"]),axis = 1, result_type="expand")
    data.drop(["Month","STATION","days"],axis = 1,inplace = True)
    url = "&year1=2024&month1=3&day1=" + str(day) + "&year2=2024&month2=3&day2=" + str(day) + "&var=min_temp_f&var=precip_in&var=max_rh&na=blank&format=csv"
    ud = { 'USW00012839' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC",\
     'USW00014819' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=FL_ASOS&stations=MIA",\
      'USW00013904' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=IL_ASOS&stations=MDW",\
     'USW00094728' : "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=TX_ASOS&stations=AUS"}
    url = ud[row["STATION"]] + url
    r = requests.get(url).text
    data.at[i,"TMIN"] = float(r.split("\n")[1].split(",")[-3])
    data.at[i,"max_rh"] = float(r.split("\n")[1].split(",")[-1])
    data.at[i,"ry"] = float(r.split("\n")[1].split(",")[-2])
    tick = {3:'NY',1:'C',2:'A',0:'M'}[i]
    data.at[i,"temp_forecast"] =(a[a["location_id (id)"] == tick].loc[0]["temperature (degC)"] * 1.8) + 32
    


NYC,2024-03-18,48.0
MIA,2024-03-18,74.0
MDW,2024-03-18,


ValueError: could not convert string to float: ''

In [210]:
model.train()

Epoch no. 1 |total_loss:  94325.65456734126 | epoch_duration:  1.79 sec
Epoch no. 2 |total_loss:  33262.54662403601 | epoch_duration:  1.81 sec
Epoch no. 3 |total_loss:  31194.10636482297 | epoch_duration:  1.74 sec
Epoch no. 4 |total_loss:  29860.688218314775 | epoch_duration:  1.81 sec
Epoch no. 5 |total_loss:  28860.173423635784 | epoch_duration:  1.72 sec
Epoch no. 6 |total_loss:  27886.6012325522 | epoch_duration:  1.78 sec
Epoch no. 7 |total_loss:  27251.314880143695 | epoch_duration:  1.83 sec
Epoch no. 8 |total_loss:  26081.98154727582 | epoch_duration:  1.82 sec
Epoch no. 9 |total_loss:  25185.2185729402 | epoch_duration:  1.85 sec
Epoch no. 10 |total_loss:  24476.975446796307 | epoch_duration:  1.89 sec
Epoch no. 11 |total_loss:  24198.89677720521 | epoch_duration:  1.85 sec
Epoch no. 12 |total_loss:  23747.9936821425 | epoch_duration:  1.88 sec
Epoch no. 13 |total_loss:  23339.136313121744 | epoch_duration:  1.85 sec
Epoch no. 14 |total_loss:  23149.65516880386 | epoch_durat

In [136]:
#client = ExchangeClient("https://demo.kalshi.co/","mlevine6@bu.edu","in5u>e@t}MaE:mM")
config = kalshi_python.Configuration()
# Comment the line below to use production
config.host = 'https://demo-api.kalshi.co/trade-api/v2'

# Create an API configuration passing your credentials.
# Use this if you want the kalshi_python sdk to manage the authentication for you.
kalshi_api = kalshi_python.ApiInstance(
    email="mlevine6@bu.edu",
    password= "in5u>e@t}MaE:mM",
    configuration=config,
)
balanceResponse = kalshi_api.get_balance()
exchangeStatus = kalshi_api.get_exchange_status()
print(balanceResponse)
print(exchangeStatus)
tickers = ["HIGHNY-24MAR16","highchi","highmia","highaus"]
orderbookResponse = kalshi_api.get_event(tickers[0])
# print(orderbookResponse)
markets = orderbookResponse.markets
for o in orderbookResponse.markets:
    if o.result == "yes":
        print(o.ticker)
temps = [float(o.ticker.split("-")[-1][1:]) for o in markets]
# diffs = [abs(pred-t) for t in temps]
# if exchangeStatus.trading_active:
#     # Submit an order for 10 yes contracts at 50cents on 'FED-23DEC-T3.00'.
#     orderUuid = str(uuid.uuid4())
#     orderResponse = kalshi_api.create_order(CreateOrderRequest(
#         ticker=tickers[0],
#         action='buy',
#         type='limit',
#         yes_price=50,
#         count=1,
#         client_order_id=orderUuid,
#         side='yes',
#     ))

{'balance': 9389}
{'exchange_active': True, 'trading_active': True}
HIGHNY-24MAR16-B61.5


In [148]:
from bs4 import BeautifulSoup
import requests
nyurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC"
miaurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=FL_ASOS&stations=MIA"
chiurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=IL_ASOS&stations=MDW"
ausurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=TX_ASOS&stations=AUS"
day = 17

# r = requests.get("https://forecast.weather.gov/product.php?site=NWS&issuedby=NYC&product=CLI&format=CI&version=1&glossary=1&highlight=off").text
# table = r.split("...................................................................")[1]
# maximum = int(table.split("MAXIMUM")[1].split()[0])
# minimum = table.split("MINIMUM")[1].split()[0])

# parser= BeautifulSoup(r,"html.parser")
r = requests.get("https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC&year1=2024&month1=3&day1=" + str(day) + "&year2=2024&month2=3&day2=" + str(day) + "&var=max_temp_f&na=blank&format=csv").text
print(r.split(",")[-1])
print(len(r.split("\n")))
print(r.split("\n"))


63.0

3
['station,day,max_temp_f', 'NYC,2024-03-17,63.0', '']


In [143]:
import json
def update(day):
    nyurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=NY_ASOS&stations=NYC"
    miaurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=FL_ASOS&stations=MIA"
    chiurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=IL_ASOS&stations=MDW"
    ausurl = "https://mesonet.agron.iastate.edu/cgi-bin/request/daily.py?network=TX_ASOS&stations=AUS"
    with open('vars.json', 'r') as f:
        data = json.load(f)
        pday = data["prev_day"]
        diff = day - pday
        if diff <= 0:
            return
        url = "&year1=2024&month1=3&day1=" + str(pday + 1) + "&year2=2024&month2=3&day2=" + str(day) + "&var=max_temp_f&na=blank&format=csv"
        for city in ["NY","M","C","A"]:
            u = {"NY" : nyurl,"M" : miaurl, "C" : chiurl,"A" : ausurl}[city] + url
            r = requests.get(u).text
            lines = r.split("\n")[1:-1]
            newlist = data[city]
            newlist = newlist[diff:]
            for line in lines:
                newlist.append(float(line.split(",")[-1]))
            data[city] = newlist
        data["prev_day"] = day
    with open('vars.json', 'w') as f:
        json.dump(data,f)
            
# NY_daily = [43,36,40,41,40,43,45,47,44,41,55,55,62,43,48,55,68,59,49,53,54,57,49,51,52,66,62,74,73,61]
# M_daily =  [79,84,76,70,70,74,77,79,76,77,76,78,80,81,79,80,83,85,83,82,86,85,86,88,80,80,81,84,84,86]
# C_daily =  [33,30,47,52,58,68,61,52,36,62,72,74,53,46,47,60,74,72,45,50,50,47,47,49,68,72,69,67,52,60]
# A_daily =  [73,52,54,68,79,77,87,75,79,83,82,80,70,49,68,81,80,83,91,83,75,83,62,67,73,77,84,80,76,75]

In [35]:
# import matplotlib.pyplot as plt
# df = pd.read_csv("1965_2024.csv")
# for at in ["TMIN","ry","max_rh","Week_av","2Week_av","Month_av","d1","d2","d3","d4","d5","d6","d7"]:
#     plt.figure(figsize=(9,6))
#     plt.subplot(1,2,1)
#     plt.scatter(df[at],df["TMAX"])
    

In [131]:
import requests
key = "ade9eb48979c4310ae7b3ba195cfd999"
key2 = "2b13ca426b96408d9d0c1f3814d15390"
key0 = "a5c8c0da0c5f449fb21e280d3b6dbd3a"
key3 = "bde9418ce268472293a3d35791409fbc"
url = 'https://api.oikolab.com/weather'

r = requests.get(url,
                 params={'param': ['temperature'],
                         'lat': [40.78,41.7,30.2,25.8],
                         'lon': [-74,-87.7,-97.7,80.3],
                         'location_id': ['NY','C','A','M'],
                         'freq' : 'D',
                         'start': '2000-01-01',
                         'end': '2023-12-31'},
                 headers={'api-key': key3}
                 )

In [132]:
import datetime as dt
import ast
a = ast.literal_eval(ast.literal_eval(r.text)["data"])
# print(ast.literal_eval(a["data"])["data"])
a = pd.DataFrame(columns = a["columns"], data = a["data"])
# ny = a[a["location_id (id)"] == "NY"]
# au = a[a["location_id (id)"] == "A"]
# m  = a[a["location_id (id)"] == "M"]
# c  = a[a["location_id (id)"] == "C"]
# dfs = []
# for df in [ny,au,m,c]:
for i, row in a.iterrows():
    a.at[i,"datetime"] = dt.datetime(2000,1,1) + dt.timedelta(days = i%(len(a)/4))
#     dfs.append(df)
# dfs = pd.concat(dfs)
print(a)

      coordinates (lat,lon) location_id (id) model (name)  \
0              (25.8, 80.3)                M         era5   
1              (25.8, 80.3)                M         era5   
2              (25.8, 80.3)                M         era5   
3              (25.8, 80.3)                M         era5   
4              (25.8, 80.3)                M         era5   
...                     ...              ...          ...   
35059         (41.7, -87.7)                C         era5   
35060         (41.7, -87.7)                C         era5   
35061         (41.7, -87.7)                C         era5   
35062         (41.7, -87.7)                C         era5   
35063         (41.7, -87.7)                C         era5   

       model elevation (surface)  utc_offset (hrs)  temperature (degC)  \
0                         117.83               5.5               14.03   
1                         117.83               5.5               12.91   
2                         117.83             

In [133]:
a.to_csv("forecasts2000_2023.csv",index = False)