# Model training

In [1]:
import pandas as pd

# Loading in the combined weather data of all the cities
combined_data = pd.read_excel('data/all_cities_weather_filtered.xlsx')
combined_data

Unnamed: 0,Date,maxtemp_sc,maxtemp_noaa,maxtemp_vc,maxtemp_mt,maxtemp_om,max_temp,City
0,2020-01-01,82,79.0,,82.0,76.811905,80,Miami
1,2020-02-01,81,77.0,,77.0,79.511900,79,Miami
2,2020-03-01,84,78.0,,75.9,81.491900,80,Miami
3,2020-04-01,87,,,88.0,84.731895,87,Miami
4,2020-05-01,72,83.0,,84.9,67.091900,77,Miami
...,...,...,...,...,...,...,...,...
5839,2023-27-12,44,44.0,43.4,44.1,41.383400,43,Chicago
5840,2023-28-12,44,44.0,42.8,44.1,39.673400,43,Chicago
5841,2023-29-12,44,44.0,42.8,44.1,40.123398,43,Chicago
5842,2023-30-12,39,39.0,37.5,39.0,42.553400,39,Chicago


In [2]:
import pandas as pd

# Correcting the date parsing
combined_data['Date'] = pd.to_datetime(combined_data['Date'], format='%Y-%d-%m')

combined_data['Year'] = combined_data['Date'].dt.year
combined_data['Month'] = combined_data['Date'].dt.month
combined_data['Day'] = combined_data['Date'].dt.day


In [3]:
# Encode the city names
combined_data = pd.get_dummies(combined_data, columns=['City'])
combined_data

Unnamed: 0,Date,maxtemp_sc,maxtemp_noaa,maxtemp_vc,maxtemp_mt,maxtemp_om,max_temp,Year,Month,Day,City_Austin,City_Central Park,City_Chicago,City_Miami
0,2020-01-01,82,79.0,,82.0,76.811905,80,2020,1,1,0,0,0,1
1,2020-01-02,81,77.0,,77.0,79.511900,79,2020,1,2,0,0,0,1
2,2020-01-03,84,78.0,,75.9,81.491900,80,2020,1,3,0,0,0,1
3,2020-01-04,87,,,88.0,84.731895,87,2020,1,4,0,0,0,1
4,2020-01-05,72,83.0,,84.9,67.091900,77,2020,1,5,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5839,2023-12-27,44,44.0,43.4,44.1,41.383400,43,2023,12,27,0,0,1,0
5840,2023-12-28,44,44.0,42.8,44.1,39.673400,43,2023,12,28,0,0,1,0
5841,2023-12-29,44,44.0,42.8,44.1,40.123398,43,2023,12,29,0,0,1,0
5842,2023-12-30,39,39.0,37.5,39.0,42.553400,39,2023,12,30,0,0,1,0


In [4]:
# obtaining the features and target of the data for training
features = combined_data[['Year', 'Month', 'Day'] + [col for col in combined_data.columns if 'City_' in col]]
target = combined_data['max_temp']


In [5]:
# Import necessary libraries and functions

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize the Gradient Boosting Regressor model

model = GradientBoostingRegressor()

# Fit the model on the training data

model.fit(X_train, y_train)


In [6]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Predictions on training set
y_train_pred = model.predict(X_train)

# Calculating MSE and RMSE for the training set
mse_train = mean_squared_error(y_train, y_train_pred)
rmse_train = sqrt(mse_train)

# Predictions on test set
y_test_pred = model.predict(X_test)

# Calculating MSE and RMSE for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_test = sqrt(mse_test)

# Printing the results
print(f"Training MSE: {mse_train}, Training RMSE: {rmse_train}")
print(f"Test MSE: {mse_test}, Test RMSE: {rmse_test}")


Training MSE: 46.85289177383847, Training RMSE: 6.844917221839755
Test MSE: 53.912886564779136, Test RMSE: 7.342539517413518


In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

# Define the parameter grid to search over
param_grid = {
    'n_estimators': [100, 200, 300], 
    'learning_rate': [0.01],
    'max_depth': [3, 4, 5],  
    'min_samples_split': [2, 4],
    'min_samples_leaf': [1, 2]
}

# Initialize the Gradient Boosting Regressor
gb_regressor = GradientBoostingRegressor()

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=gb_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit the GridSearch to the data
grid_search.fit(X_train, y_train)


Fitting 5 folds for each of 36 candidates, totalling 180 fits


In [8]:
# Print the best parameters
print("Best parameters found: ", grid_search.best_params_)

# Get the best model
model = grid_search.best_estimator_

# Predict and evaluate
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)
rmse_train = sqrt(mse_train)
rmse_test = sqrt(mse_test)

print(f"Training MSE: {mse_train}, Training RMSE: {rmse_train}")
print(f"Test MSE: {mse_test}, Test RMSE: {rmse_test}")


Best parameters found:  {'learning_rate': 0.01, 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Training MSE: 47.30506022479826, Training RMSE: 6.877867418378917
Test MSE: 55.24241446506992, Test RMSE: 7.432524097846566


In [9]:
from sklearn.ensemble import GradientBoostingRegressor

# Adjusted model parameters for regularization
model_params = {
    'learning_rate': 0.1,  
    'max_depth': 3,  
    'min_samples_leaf': 4,  
    'min_samples_split': 4,  
    'n_estimators': 300,
    'subsample': 0.8  
}

# Initialize the model with the adjusted parameters
ad_model = GradientBoostingRegressor(**model_params)

# Re-train the model
ad_model.fit(X_train, y_train)

# Evaluate the model's performance
y_train_pred = ad_model.predict(X_train)
y_test_pred = ad_model.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_train = sqrt(mse_train)
rmse_test = sqrt(mse_test)

print(f"Adjusted Model - Training MSE: {mse_train}, Training RMSE: {rmse_train}")
print(f"Adjusted Model - Test MSE: {mse_test}, Test RMSE: {rmse_test}")


Adjusted Model - Training MSE: 40.81010534932405, Training RMSE: 6.388278746996256
Adjusted Model - Test MSE: 48.912979055795354, Test RMSE: 6.993781456107658


In [10]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

# Define the KFold parameters
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(ad_model, X_train, y_train, cv=kf, scoring='neg_mean_squared_error')


In [11]:
# Calculate the mean and standard deviation of the cross-validation scores
mean_cv_score = np.mean(cv_scores)
std_cv_score = np.std(cv_scores)

# Convert the scores to positive values (since they are negative MSE)
mean_cv_score = -mean_cv_score
std_cv_score = std_cv_score

print(f"Mean CV MSE: {mean_cv_score}, Std CV MSE: {std_cv_score}")
print(f"Mean CV RMSE: {np.sqrt(mean_cv_score)}, Std CV RMSE: {np.sqrt(mean_cv_score + std_cv_score) - np.sqrt(mean_cv_score)}")


Mean CV MSE: 46.430263501566316, Std CV MSE: 3.846362337824184
Mean CV RMSE: 6.813975601773631, Std CV RMSE: 0.2766256308011146


In [12]:
# the following code allows using the model to get predicted max temp of any city for any date

import numpy as np
import pandas as pd

# List of cities based on your DataFrame's encoded city columns
cities = ['City_Austin', 'City_Central Park', 'City_Chicago', 'City_Miami']



# Initialize a dictionary to store the predicted temperatures
predicted_temperatures = {}

# Prepare and make predictions for each city
for city in cities:
    # Create a DataFrame for example_features with all city columns set to 0
    example_features = pd.DataFrame({
        'Year': [2024],
        'Month': [3],
        'Day': [23],
        'City_Austin': [0],
        'City_Central Park': [0],
        'City_Chicago': [0],
        'City_Miami': [0],
    }, columns=['Year', 'Month', 'Day', 'City_Austin', 'City_Central Park', 'City_Chicago', 'City_Miami'])
    
    # Set the current city column to 1
    example_features[city] = 1
    
    # Make the prediction using the trained model
    predicted_temp = ad_model.predict(example_features).round()
    
    # Store the predicted temperature for the city
    predicted_temperatures[city] = predicted_temp[0]

# Now, predicted_temperatures dictionary holds the predicted values for each city
print(f"Predicted max temperature for Cities: {predicted_temperatures}")


Predicted max temperature for Cities: {'City_Austin': 77.0, 'City_Central Park': 54.0, 'City_Chicago': 52.0, 'City_Miami': 83.0}


# API and Automation

In [13]:
import uuid
import datetime
import kalshi_python
from kalshi_python.models import *

# Your predicted temperatures for each city
predicted_temperatures 

# City codes for constructing event tickers
city_codes = {
    'City_Austin': 'HIGHAUS', 
    'City_Central Park': 'HIGHNY', 
    'City_Chicago': 'HIGHCHI', 
    'City_Miami': 'HIGHMIA'
}

config = kalshi_python.Configuration()
# Comment the line below to use production
config.host = 'https://demo-api.kalshi.co/trade-api/v2'

# Initialize Kalshi API with your credentials
# I have removed my credentials but this code works establishing a connection to kalshi with the right credentials
kalshi_api = kalshi_python.ApiInstance(
    email='user',
    password='password',
    configuration=config
)

# Check exchange status
exchangeStatus = kalshi_api.get_exchange_status()
if exchangeStatus.trading_active:
    print("The exchange is trading active")
    exit()
    



The exchange is trading active


In [14]:
# Current date formatted to match the event ticker format (e.g., "24MAR23")
current_date_formatted = datetime.datetime.now().strftime('%y%b%d').upper()
current_date_formatted

'24MAR29'

In [15]:

# Initialize a dictionary to store the predicted temperatures
event_tickers = {}

# Iterate through each city and its predicted temperature
for city, predicted_temp in predicted_temperatures.items():
    # Construct the event ticker for the current date based on the city
    event_tickers[city] = f"{city_codes[city]}-{current_date_formatted}"

event_tickers

{'City_Austin': 'HIGHAUS-24MAR29',
 'City_Central Park': 'HIGHNY-24MAR29',
 'City_Chicago': 'HIGHCHI-24MAR29',
 'City_Miami': 'HIGHMIA-24MAR29'}

In [16]:
# Assuming kalshi_api is your configured API instance and event_tickers is already defined

# Dictionary to store options for each city
market_options_by_city = {}

for city, event_ticker in event_tickers.items():
    try:
        # Get the event details
        eventResponse = kalshi_api.get_event(event_ticker)
        
        # Assuming 'markets' is the correct attribute name for accessing the markets in the event response
        # Extract only subtitle and ticker for each market option
        market_options = [{'subtitle': market.subtitle, 'ticker': market.ticker} for market in eventResponse.markets]
        
        # Store the extracted options in the dictionary
        market_options_by_city[city] = market_options
        
    except Exception as e:
        print(f"Error fetching market options for {city} ({event_ticker}): {e}")
        market_options_by_city[city] = []  # Store an empty list if there was an error

# Print the market options for each city
for city, options in market_options_by_city.items():
    print(f"{city}: {options}")


City_Austin: [{'subtitle': '73° or below', 'ticker': 'HIGHAUS-24MAR29-T74'}, {'subtitle': '74° to 75°', 'ticker': 'HIGHAUS-24MAR29-B74.5'}, {'subtitle': '76° to 77°', 'ticker': 'HIGHAUS-24MAR29-B76.5'}, {'subtitle': '78° to 79°', 'ticker': 'HIGHAUS-24MAR29-B78.5'}, {'subtitle': '80° to 81°', 'ticker': 'HIGHAUS-24MAR29-B80.5'}, {'subtitle': '82° or above', 'ticker': 'HIGHAUS-24MAR29-T81'}]
City_Central Park: [{'subtitle': '50° or below', 'ticker': 'HIGHNY-24MAR29-T51'}, {'subtitle': '51° to 52°', 'ticker': 'HIGHNY-24MAR29-B51.5'}, {'subtitle': '53° to 54°', 'ticker': 'HIGHNY-24MAR29-B53.5'}, {'subtitle': '55° to 56°', 'ticker': 'HIGHNY-24MAR29-B55.5'}, {'subtitle': '57° to 58°', 'ticker': 'HIGHNY-24MAR29-B57.5'}, {'subtitle': '59° or above', 'ticker': 'HIGHNY-24MAR29-T58'}]
City_Chicago: [{'subtitle': '47° or below', 'ticker': 'HIGHCHI-24MAR29-T48'}, {'subtitle': '48° to 49°', 'ticker': 'HIGHCHI-24MAR29-B48.5'}, {'subtitle': '50° to 51°', 'ticker': 'HIGHCHI-24MAR29-B50.5'}, {'subtitle':

In [17]:
market_options_by_city

{'City_Austin': [{'subtitle': '73° or below', 'ticker': 'HIGHAUS-24MAR29-T74'},
  {'subtitle': '74° to 75°', 'ticker': 'HIGHAUS-24MAR29-B74.5'},
  {'subtitle': '76° to 77°', 'ticker': 'HIGHAUS-24MAR29-B76.5'},
  {'subtitle': '78° to 79°', 'ticker': 'HIGHAUS-24MAR29-B78.5'},
  {'subtitle': '80° to 81°', 'ticker': 'HIGHAUS-24MAR29-B80.5'},
  {'subtitle': '82° or above', 'ticker': 'HIGHAUS-24MAR29-T81'}],
 'City_Central Park': [{'subtitle': '50° or below',
   'ticker': 'HIGHNY-24MAR29-T51'},
  {'subtitle': '51° to 52°', 'ticker': 'HIGHNY-24MAR29-B51.5'},
  {'subtitle': '53° to 54°', 'ticker': 'HIGHNY-24MAR29-B53.5'},
  {'subtitle': '55° to 56°', 'ticker': 'HIGHNY-24MAR29-B55.5'},
  {'subtitle': '57° to 58°', 'ticker': 'HIGHNY-24MAR29-B57.5'},
  {'subtitle': '59° or above', 'ticker': 'HIGHNY-24MAR29-T58'}],
 'City_Chicago': [{'subtitle': '47° or below',
   'ticker': 'HIGHCHI-24MAR29-T48'},
  {'subtitle': '48° to 49°', 'ticker': 'HIGHCHI-24MAR29-B48.5'},
  {'subtitle': '50° to 51°', 'ticker

In [18]:
 predicted_temperatures

{'City_Austin': 77.0,
 'City_Central Park': 54.0,
 'City_Chicago': 52.0,
 'City_Miami': 83.0}

In [19]:
import re

def find_closest_market_option(predicted_temp, market_options):
    closest_option = None
    closest_temp_diff = float('inf')

    for option in market_options:
        subtitle = option['subtitle']
        
        if 'or below' in subtitle:
            max_temp = int(re.findall(r'\d+', subtitle)[0])
            if predicted_temp <= max_temp:
                temp_diff = abs(predicted_temp - max_temp)
                if temp_diff < closest_temp_diff:
                    closest_temp_diff = temp_diff
                    closest_option = option
        elif 'or above' in subtitle:
            min_temp = int(re.findall(r'\d+', subtitle)[0])
            if predicted_temp >= min_temp:
                temp_diff = abs(predicted_temp - min_temp)
                if temp_diff < closest_temp_diff:
                    closest_temp_diff = temp_diff
                    closest_option = option
        else:
            temps = re.findall(r'\d+', subtitle)
            if len(temps) == 2:
                range_min, range_max = map(int, temps)
                if range_min <= predicted_temp <= range_max:
                    return option
                else:
                    temp_diff = min(abs(predicted_temp - range_min), abs(predicted_temp - range_max))
                    if temp_diff < closest_temp_diff:
                        closest_temp_diff = temp_diff
                        closest_option = option
    
    return closest_option


In [20]:
for city, predicted_temp in predicted_temperatures.items():
    market_options = market_options_by_city.get(city, [])
    closest_option = find_closest_market_option(predicted_temp, market_options)
    
    if closest_option:
        ticker = closest_option['ticker']
        print(f"Attempting to place order for {city} with ticker {ticker} based on predicted temperature of {predicted_temp}°")
        
        # Place the actual order
        orderUuid = str(uuid.uuid4())
        try:
            orderResponse = kalshi_api.create_order(kalshi_python.CreateOrderRequest(
                ticker=ticker,
                action='buy',
                type='limit',
                yes_price=10,  # Assuming $10 as the price per contract
                count=10,  # Number of contracts to buy
                client_order_id=orderUuid,
                side='yes',  # Assuming you're buying 'yes' contracts
            ))
            print(f"Order submitted for {city} with ticker {ticker}: {orderResponse}")
        except Exception as e:
            print(f"Error submitting order for {city} with ticker {ticker}: {str(e)}")
    else:
        print(f"No suitable market option found for {city} with predicted temperature of {predicted_temp}°")


Attempting to place order for City_Austin with ticker HIGHAUS-24MAR29-B76.5 based on predicted temperature of 77.0°
Order submitted for City_Austin with ticker HIGHAUS-24MAR29-B76.5: {'order': {'action': 'buy',
           'client_order_id': 'a905f283-4415-4ad6-817f-f4766724b1a6',
           'created_time': '2024-03-29T20:49:25.910331Z',
           'expiration_time': None,
           'no_price': 90,
           'order_id': 'febe84fc-370c-45ae-b1b6-4614092828d9',
           'side': 'yes',
           'status': 'resting',
           'ticker': 'HIGHAUS-24MAR29-B76.5',
           'type': 'limit',
           'user_id': 'a6a4f5c5-8d6f-4b2f-bda5-3a7c41b97c67',
           'yes_price': 10}}
Attempting to place order for City_Central Park with ticker HIGHNY-24MAR29-B53.5 based on predicted temperature of 54.0°
Order submitted for City_Central Park with ticker HIGHNY-24MAR29-B53.5: {'order': {'action': 'buy',
           'client_order_id': 'de07fd53-7f4e-48cc-b031-be50f9c0e373',
           'created_t