In [1]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import pandas as pd
import numpy as np
import time

In [6]:
class FareForecastingModel(nn.Module):
    def __init__(self, num_locations, embedding_dim, num_numeric_features, lstm_hidden_dim, lstm_layers):
        super(FareForecastingModel, self).__init__()
        
        # Embedding layers for pick-up and drop-off
        self.pickup_embedding = nn.Embedding(num_locations, embedding_dim)
        self.dropoff_embedding = nn.Embedding(num_locations, embedding_dim)
        
        # LSTM for modeling temporal sequence
        # Suppose our input for LSTM is the concatenation of embeddings + numeric features at each time step
        # The input dimension for LSTM: 2*embedding_dim + num_numeric_features
        lstm_input_dim = 2 * embedding_dim + num_numeric_features
        self.lstm = nn.LSTM(input_size=lstm_input_dim, hidden_size=lstm_hidden_dim, 
                            num_layers=lstm_layers, batch_first=True)
        
        # Fully connected layer to produce the forecast
        self.fc = nn.Linear(lstm_hidden_dim, 1)
        
    def forward(self, pickup_ids, dropoff_ids, numeric_seq):
        # pickup_ids, dropoff_ids are assumed to have shape (batch_size, seq_length)
        # numeric_seq has shape (batch_size, seq_length, num_numeric_features)
        
        # Get embeddings (result shape: (batch_size, seq_length, embedding_dim))
        pickup_emb = self.pickup_embedding(pickup_ids)
        dropoff_emb = self.dropoff_embedding(dropoff_ids)
        
        # Concatenate embeddings with numeric features along last dimension
        # New shape: (batch_size, seq_length, 2*embedding_dim + num_numeric_features)
        lstm_input = torch.cat((pickup_emb, dropoff_emb, numeric_seq), dim=-1)
        
        # Pass through LSTM
        lstm_out, _ = self.lstm(lstm_input)
        # For simplicity, predict using the output at the final time step
        final_output = lstm_out[:, -1, :]
        
        # Forecast output
        forecast = self.fc(final_output)
        return forecast

In [2]:
# Encoding Functions

def circular_encoder(df):
    # Encode dow and hour as sin and cos
    
    df['dow_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7.0)
    df['dow_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7.0)
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24.0)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24.0)
    df.drop(['day_of_week', 'hour'], axis=1, inplace=True)
    
    return df

In [7]:
def total_amount_base_model(X_train, y_train, X_test, y_test):
    # 1) remap your IDs exactly as before
    unique_ids = sorted(set(X_train['PULocationID']).union(X_train['DOLocationID']))
    id_to_index = {loc_id: idx for idx, loc_id in enumerate(unique_ids)}
    for df in (X_train, X_test):
        df['PULocationID'] = df['PULocationID'].map(id_to_index)
        df['DOLocationID'] = df['DOLocationID'].map(id_to_index)

    num_locations = len(unique_ids)
    num_numeric  = X_train.shape[1] - 2

    # 2) pick your device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 3) build & move model + loss
    model     = FareForecastingModel(
        num_locations=num_locations,
        embedding_dim=8,
        num_numeric_features=num_numeric,
        lstm_hidden_dim=64,
        lstm_layers=2
    ).to(device)
    criterion = nn.MSELoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    # 4) create four big tensors on GPU once
    pu_train  = torch.tensor(X_train['PULocationID'].values).long().to(device)
    do_train  = torch.tensor(X_train['DOLocationID'].values).long().to(device)
    num_train = torch.tensor(
        X_train.drop(['PULocationID','DOLocationID'], axis=1).values
    ).float().to(device)
    y_train_t = torch.tensor(y_train.values).float().to(device)

    # for eval later
    pu_test   = torch.tensor(X_test['PULocationID'].values).long().to(device)
    do_test   = torch.tensor(X_test['DOLocationID'].values).long().to(device)
    num_test  = torch.tensor(
        X_test.drop(['PULocationID','DOLocationID'], axis=1).values
    ).float().to(device)

    # 5) training loop slices those GPU tensors directly
    batch_size  = 32
    num_samples = pu_train.size(0)
    num_batches = num_samples // batch_size

    start = time.time()
    for epoch in range(10):
        print(f"Starting epoch: {epoch}")
        model.train()
        running_loss = 0.0

        for i in range(num_batches):
            s = i * batch_size
            e = s + batch_size

            batch_pu   = pu_train[s:e].unsqueeze(1)
            batch_do   = do_train[s:e].unsqueeze(1)
            batch_nums = num_train[s:e].unsqueeze(1)
            batch_y    = y_train_t[s:e]

            optimizer.zero_grad()
            preds = model(batch_pu, batch_do, batch_nums)
            loss  = criterion(preds.squeeze(), batch_y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * batch_size

        print(f"Epoch {epoch+1:02d}  avg loss: {running_loss/num_samples:.4f}")

    print("Training Time:", time.time() - start)
    return model, (pu_test, do_test, num_test, y_test.values)

def evaluate(model, test_tensors):
    pu_test, do_test, num_test, y_test_vals = test_tensors
    device = next(model.parameters()).device

    model.eval()
    with torch.no_grad():
        preds = model(
            pu_test.unsqueeze(1),
            do_test.unsqueeze(1),
            num_test.unsqueeze(1)
        ).squeeze().cpu().numpy()

    rmse = ((preds - y_test_vals)**2).mean()**0.5
    print(f"RMSE: {rmse:.4f}")
    return rmse


In [3]:
# Loading the data
df_train_base = pd.read_csv('data/train.csv')
df_test_base = pd.read_csv('data/test.csv')

df_train = df_train_base.copy()
df_test = df_test_base.copy()

df_train = circular_encoder(df_train)
df_test = circular_encoder(df_test)

X_train= df_train.drop(['travel_time', 'total_amount'], axis=1)
y_train = df_train['total_amount']

X_test = df_test.drop(['travel_time', 'total_amount'], axis=1)
y_test = df_test['total_amount']

In [None]:
import numpy as np
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

def train_arimax(X_train, y_train, X_test, y_test, order=(1, 1, 1)):
    """
    Fits an ARIMA(p,d,q) with exogenous regressors:
       y_t = ARIMA-errors + β · X_t
    Returns the fitted model and out‑of‑sample forecasts.
    """
    # 1) drop your PU/DO ID columns (or one‑hot / embed them if you really want to use them)
    exog_train = X_train.drop(['PULocationID','DOLocationID'], axis=1)
    exog_test  = X_test .drop(['PULocationID','DOLocationID'], axis=1)

    # 2) build & fit
    model = sm.tsa.statespace.SARIMAX(
        endog=y_train,
        exog=exog_train,
        order=order,
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    res = model.fit(disp=False)
    print(res.summary())

    # 3) forecast the next len(y_test) points
    forecast = res.get_forecast(
        steps=len(y_test),
        exog=exog_test
    ).predicted_mean

    # 4) evaluate
    rmse = np.sqrt(mean_squared_error(y_test, forecast))
    print(f"\nARIMAX{order} RMSE: {rmse:.4f}")
    return res, forecast

# --- how you’d call it:
# choose (p,d,q) to your taste (you can also use pmdarima.auto_arima to select them)
order = (1, 1, 1)

arimax_model, arimax_preds = train_arimax(
    X_train,       # your DataFrame of regressors
    y_train,       # your Series of target fares
    X_test,
    y_test,
    order=order
)

# ‘arimax_preds’ is a pandas Series aligned 0…len(y_test)-1
# if you want an array:
arimax_preds = arimax_preds.values


In [9]:
min(df_test_base['PULocationID'].unique())

np.int64(1)

In [12]:
model = total_amount_base_model(X_train, y_train, X_test, y_test)

Starting epoch: 0
Epoch 01  avg loss: 9.5922
Starting epoch: 1
Epoch 02  avg loss: 9.2465
Starting epoch: 2
Epoch 03  avg loss: 9.2077
Starting epoch: 3
Epoch 04  avg loss: 9.1859
Starting epoch: 4
Epoch 05  avg loss: 9.2075
Starting epoch: 5
Epoch 06  avg loss: 9.1916
Starting epoch: 6
Epoch 07  avg loss: 9.1921
Starting epoch: 7
Epoch 08  avg loss: 9.1967
Starting epoch: 8
Epoch 09  avg loss: 9.1914
Starting epoch: 9
Epoch 10  avg loss: 9.1970
Training Time: 14530.419337272644


In [16]:
type(model)

__main__.FareForecastingModel

In [None]:
torch.save(model.state_dict(), 'fare_forecasting_model.pth')

In [9]:
unique_ids = sorted(set(X_train['PULocationID']).union(X_train['DOLocationID']))

# Load the model
model = FareForecastingModel(
    num_locations=len(unique_ids),
    embedding_dim=8,
    num_numeric_features=X_train.shape[1] - 2,
    lstm_hidden_dim=64,
    lstm_layers=2
)
model.load_state_dict(torch.load('fare_forecasting_model.pth',map_location=torch.device('cpu')))
model.eval()

device = 'cpu'
unique_ids = sorted(set(X_train['PULocationID']).union(X_train['DOLocationID']))
id_to_index = {loc_id: idx for idx, loc_id in enumerate(unique_ids)}
for df in (X_train, X_test):
    df['PULocationID'] = df['PULocationID'].map(id_to_index)
    df['DOLocationID'] = df['DOLocationID'].map(id_to_index)
pu_test   = torch.tensor(X_test['PULocationID'].values).long().to(device)
do_test   = torch.tensor(X_test['DOLocationID'].values).long().to(device)
num_test  = torch.tensor(
    X_test.drop(['PULocationID','DOLocationID'], axis=1).values
).float().to(device)
model.to(device)


  model.load_state_dict(torch.load('fare_forecasting_model.pth',map_location=torch.device('cpu')))


FareForecastingModel(
  (pickup_embedding): Embedding(262, 8)
  (dropoff_embedding): Embedding(262, 8)
  (lstm): LSTM(24, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [None]:
evaluate(model, (pu_test, do_test, num_test, y_test.values))

In [None]:
# Example instantiation:
num_locations = 265
embedding_dim = 8
num_numeric_features = 5
lstm_hidden_dim = 64
lstm_layers = 2

model = FareForecastingModel(num_locations, embedding_dim, num_numeric_features, lstm_hidden_dim, lstm_layers)
print(model)