## Introduction

This is a kernel with starter code demonstrating how to read in the data and begin exploring. Click the blue "Edit Notebook" or "Fork Notebook" button at the top of this kernel to begin editing.

## Exploratory Analysis

To begin this exploratory analysis, first use `matplotlib` to import libraries and define functions for plotting the data. Depending on the data, not all plots will be made. (Hey, I'm just a kerneling bot, not a Kaggle Competitions Grandmaster!)

In [None]:
%pip install matplotlib numpy pandas scikit-learn dask "dask[dataframe]" seaborn

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


Check the data in the 2 created datasets (2019newBig.csv: 12M rows, 2019new.csv: 1.2M rows)

Load the DB and cleanup

In [None]:
import dask.dataframe as dd
import matplotlib.pyplot as plt
import pandas as pd
import os

dataset = 'Datasets/2019new.csv'

if os.path.exists(dataset):
    # Load the CSV using dask for parallel processing
    df = dd.read_csv(dataset).compute()
    
    # If 'congestion_surcharge' column has NaNs, set them to 0
    df['congestion_surcharge'] = df['congestion_surcharge'].fillna(0)
    df = df.dropna()

    # Drop rows where 'trip_distance' is 0 and 'fare_amount' is <= 0
    df = df.drop(df[(df['trip_distance'] == 0)].index)
    df = df.drop(['total_amount'], axis=1)
    df = df.drop(['extra'], axis=1)
    df = df.drop(['mta_tax'], axis=1)
    df = df.drop(['tip_amount'], axis=1)
    df = df.drop(['tolls_amount'], axis=1)
    df = df.drop(['improvement_surcharge'], axis=1)
    df = df.drop(['congestion_surcharge'], axis=1)
    df = df.drop(['store_and_fwd_flag'], axis=1)
    df = df.drop(['payment_type'], axis=1)
    df = df[df['fare_amount'] > 0]
    df = df[df['fare_amount'] <= 50]

    # Convert 'tpep_pickup_datetime' to datetime and extract date and hour
    df['tpep_pickup_datetime'] = pd.to_datetime(df['tpep_pickup_datetime'])
    df['pickup_date'] = df['tpep_pickup_datetime'].dt.date
    df['pickup_hour'] = df['tpep_pickup_datetime'].dt.hour

    # Drop unneeded columns
    df = df.drop(['tpep_pickup_datetime', 'tpep_dropoff_datetime'], axis=1)

    # Plotting distribution of rides over date
    plt.figure(figsize=(10, 6))
    df['pickup_date'].value_counts().sort_index().plot().set_xlim(pd.Timestamp('2019-01-01'), pd.Timestamp('2019-12-31'))
    plt.title('Distribution of rides over date')
    plt.show()

    # Plotting distribution of rides over hour of the day
    plt.figure(figsize=(10, 6))
    df['pickup_hour'].value_counts().sort_index().plot()
    plt.title('Distribution of rides over hour of the day')
    plt.show()
else:
    print("Dataset not found")


Setup weather and holiday database and cleanup

In [None]:

weather = pd.read_csv('Datasets/weather.csv')

weather=weather.drop(columns=['tmax','tmin','departure','HDD','CDD'])
weather['date'] = pd.to_datetime(weather['date'])

holidays = pd.read_csv('Datasets/USHoliday.csv')

#maintain only if holiday is in 2019
holidays['Date'] = pd.to_datetime(holidays['Date'])
holidays=holidays[holidays['Date'].dt.year==2019]

#set precipitation to 0 if NaN and integer, new_snow, snow_depth
weather['precipitation'] = weather['precipitation'].replace(to_replace="T", value=0)
weather['new_snow'] = weather['new_snow'].replace(to_replace="T", value=0)
weather['snow_depth'] = weather['snow_depth'].replace(to_replace="T", value=0)

#set to float
weather['precipitation'] = weather['precipitation'].astype(float)
weather['new_snow'] = weather['new_snow'].astype(float)
weather['snow_depth'] = weather['snow_depth'].astype(float)

# Ensure the pickup_date column is in datetime64[ns] format
df['pickup_date'] = pd.to_datetime(df['pickup_date'])

new_df = pd.merge(df, weather, how='left', left_on='pickup_date', right_on='date')

new_df = new_df.drop(['date'], axis=1)
#add column 1 if week day, 2 if weekend, 3 if holiday
new_df['holiday'] = new_df['pickup_date'].isin(holidays['Date']).astype(int)
new_df['day_of_week'] = new_df['pickup_date'].dt.dayofweek
new_df['day_type'] = np.where(new_df['day_of_week'] < 5, 1, 2)
new_df.loc[new_df['holiday'] == 1, 'day_type'] = 3
new_df = new_df.drop(['pickup_date'], axis=1)
new_df = new_df.drop(['day_of_week'], axis=1)
new_df = new_df.drop(['holiday'], axis=1)

#print first row full data not truncated

new_df = new_df.dropna()

Unifing with the zones database and cleanup

In [None]:
zones = pd.read_csv('Datasets/taxi_zone_lookup.csv')
zones = zones.drop(['Borough'], axis=1)
zones = zones.drop(['Zone'], axis=1)

zones = zones[zones['service_zone'] != 'N/A']

# Replace 'EWR' with 'Airports' in the 'service_zone' column
zones['service_zone'] = zones['service_zone'].replace('EWR', 'Airports')

# Merge taxi_zone_lookup.csv with the new dataset on 'pulocationid' and 'dolocationid'
pulocation = new_df.merge(zones[['LocationID', 'service_zone']], left_on='pulocationid', right_on='LocationID', how='left')
dolocation = pulocation.merge(zones[['LocationID', 'service_zone']], left_on='dolocationid', right_on='LocationID', how='left', suffixes=('_pulocation', '_dolocation'))

# Create a new column 'zone_type' based on the conditions
def get_zone_type(row):
    service_zone_pulocation = row['service_zone_pulocation']
    service_zone_dolocation = row['service_zone_dolocation']

    if service_zone_pulocation == 'Airports' or service_zone_dolocation == 'Airports':
        return 1
    elif 'Boro Zone' in [service_zone_pulocation, service_zone_dolocation]:
        return 2
    elif 'Yellow Zone' in [service_zone_pulocation, service_zone_dolocation]:
        return 3
    else:
        return None

# Apply the zone_type function to the merged dataframe
dolocation['zone_type'] = dolocation.apply(get_zone_type, axis=1)

# Remove rows where 'zone_type' is None (rows that don't meet any of the conditions)
new_df = dolocation.dropna(subset=['zone_type'])

new_df = new_df.drop(['pulocationid'], axis=1)
new_df = new_df.drop(['dolocationid'], axis=1)
new_df = new_df.drop(['LocationID_pulocation'], axis=1)
new_df = new_df.drop(['service_zone_pulocation'], axis=1)
new_df = new_df.drop(['LocationID_dolocation'], axis=1)
new_df = new_df.drop(['service_zone_dolocation'], axis=1)

print(new_df.head(1))
print(new_df.shape)

In [None]:
new_df = new_df.drop(['tavg'], axis=1)
new_df = new_df.drop(['precipitation'], axis=1)
new_df = new_df.drop(['new_snow'], axis=1)
new_df = new_df.drop(['snow_depth'], axis=1)

print(new_df.head(1))
print(new_df.shape)

Start models training with different NN and parameter to see the best ones

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch.optim as optim
from tqdm import tqdm

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define the neural network model
def NN(input_size):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(input_size, 128)
            self.fc2 = nn.Linear(128, 64)
            self.fc3 = nn.Linear(64, 1)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    return Net()

# Custom dataset class
class NYCTaxiDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Preprocess the data
input_size = new_df.shape[1] - 1
X = new_df.drop(['fare_amount'], axis=1).values
y = new_df['fare_amount'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)

train_dataset = NYCTaxiDataset(X_train, y_train)
test_dataset = NYCTaxiDataset(X_test, y_test)

batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=24, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=24, pin_memory=True)

# Initialize the model and wrap it with DataParallel
model = NN(input_size).to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
    model = nn.DataParallel(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop with progress bar
def train_model(model, train_loader, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f'Epoch [{epoch+1}/{num_epochs}]', unit='batch') as pbar:
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                pbar.update(1)

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# Evaluation function for regression
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(labels.cpu().numpy())

    mse = mean_squared_error(actuals, predictions)
    print(f"Test MSE: {mse:.2f}")
    print(f"Test RMSE: {mse ** 0.5:.2f}")

# Train the model
train_model(model, train_loader)

# Evaluate the model
evaluate_model(model, test_loader)

# Save the trained model
torch.save(model.state_dict(), 'model_Bignew.pth')

print("Training and evaluation completed!")


In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the neural network model with batch normalization and dropout
def NN(input_size):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(input_size, 128)
            self.bn1 = nn.BatchNorm1d(128)
            self.fc2 = nn.Linear(128, 64)
            self.bn2 = nn.BatchNorm1d(64)
            self.fc3 = nn.Linear(64, 1)
            self.dropout = nn.Dropout(0.3)

        def forward(self, x):
            x = F.relu(self.bn1(self.fc1(x)))
            x = self.dropout(x)
            x = F.relu(self.bn2(self.fc2(x)))
            x = self.fc3(x)
            return x

    return Net()

# Custom dataset class
class NYCTaxiDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Preprocess the data
input_size = new_df.shape[1] - 1
X = new_df.drop(['fare_amount'], axis=1).values
y = new_df['fare_amount'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)

train_dataset = NYCTaxiDataset(X_train, y_train)
test_dataset = NYCTaxiDataset(X_test, y_test)

batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=24, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=24, pin_memory=True)

# Initialize the model and wrap it with DataParallel
model = NN(input_size).to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
    model = nn.DataParallel(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

# Training loop with mixed precision
scaler = torch.amp.GradScaler('cuda')

def train_model(model, train_loader, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f'Epoch [{epoch+1}/{num_epochs}]', unit='batch') as pbar:
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

                optimizer.zero_grad()
                with torch.amp.autocast('cuda'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                running_loss += loss.item()
                pbar.update(1)

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        scheduler.step(epoch_loss)

# Evaluation function for regression
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(labels.cpu().numpy())

    mse = mean_squared_error(actuals, predictions)
    print(f"Test MSE: {mse:.2f}")
    print(f"Test RMSE: {mse ** 0.5:.2f}")

# Train the model
train_model(model, train_loader)

# Evaluate the model
evaluate_model(model, test_loader)

# Save the trained model
torch.save(model.state_dict(), 'model_Bignew.pth')

print("Training and evaluation completed!")


In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Enable BF16 training if supported
torch.backends.cuda.matmul.allow_tf32 = True  # Enable TF32

# Define the neural network model with batch normalization and dropout
def NN(input_size):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(input_size, 128)
            self.bn1 = nn.BatchNorm1d(128)
            self.fc2 = nn.Linear(128, 64)
            self.bn2 = nn.BatchNorm1d(64)
            self.fc3 = nn.Linear(64, 1)
            self.dropout = nn.Dropout(0.3)

        def forward(self, x):
            x = F.relu(self.bn1(self.fc1(x)))
            x = self.dropout(x)
            x = F.relu(self.bn2(self.fc2(x)))
            x = self.fc3(x)
            return x

    return Net()

# Custom dataset class
class NYCTaxiDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Preprocess the data
input_size = new_df.shape[1] - 1
X = new_df.drop(['fare_amount'], axis=1).values
y = new_df['fare_amount'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)

train_dataset = NYCTaxiDataset(X_train, y_train)
test_dataset = NYCTaxiDataset(X_test, y_test)

batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=24, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=24, pin_memory=True)

# Initialize the model and wrap it with DataParallel
model = NN(input_size).to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
    model = nn.DataParallel(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

# Training loop with bf16 precision
scaler = torch.amp.GradScaler('cuda')

def train_model(model, train_loader, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f'Epoch [{epoch+1}/{num_epochs}]', unit='batch') as pbar:
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                running_loss += loss.item()
                pbar.update(1)

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        scheduler.step(epoch_loss)

# Evaluation function for regression
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(labels.cpu().numpy())

    mse = mean_squared_error(actuals, predictions)
    print(f"Test MSE: {mse:.2f}")
    print(f"Test RMSE: {mse ** 0.5:.2f}")

# Train the model
train_model(model, train_loader)

# Evaluate the model
evaluate_model(model, test_loader)

# Save the trained model
torch.save(model.state_dict(), 'model_BF16.pth')

print("Training and evaluation completed!")

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define a larger neural network model
def LargeNN(input_size):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(input_size, 512)  # Larger layer with 512 neurons
            self.bn1 = nn.BatchNorm1d(512)
            self.fc2 = nn.Linear(512, 256)         # Another large layer with 256 neurons
            self.bn2 = nn.BatchNorm1d(256)
            self.fc3 = nn.Linear(256, 128)         # 128 neurons
            self.bn3 = nn.BatchNorm1d(128)
            self.fc4 = nn.Linear(128, 64)          # 64 neurons
            self.bn4 = nn.BatchNorm1d(64)
            self.fc5 = nn.Linear(64, 32)           # 32 neurons
            self.bn5 = nn.BatchNorm1d(32)
            self.fc6 = nn.Linear(32, 1)            # Output layer

            self.dropout = nn.Dropout(0.4)         # Increased dropout rate to combat overfitting

        def forward(self, x):
            x = F.relu(self.bn1(self.fc1(x)))
            x = self.dropout(x)
            x = F.relu(self.bn2(self.fc2(x)))
            x = self.dropout(x)
            x = F.relu(self.bn3(self.fc3(x)))
            x = self.dropout(x)
            x = F.relu(self.bn4(self.fc4(x)))
            x = self.dropout(x)
            x = F.relu(self.bn5(self.fc5(x)))
            x = self.fc6(x)
            return x

    return Net()

# Custom dataset class
class NYCTaxiDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Preprocess the data
input_size = new_df.shape[1] - 1
X = new_df.drop(['fare_amount'], axis=1).values
y = new_df['fare_amount'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)

train_dataset = NYCTaxiDataset(X_train, y_train)
test_dataset = NYCTaxiDataset(X_test, y_test)

batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=24, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=24, pin_memory=True)

# Initialize the model and wrap it with DataParallel if multiple GPUs are available
model = LargeNN(input_size).to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
    model = nn.DataParallel(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

# Training loop with TF32 enabled
def train_model(model, train_loader, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f'Epoch [{epoch+1}/{num_epochs}]', unit='batch') as pbar:
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                pbar.update(1)

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        scheduler.step(epoch_loss)

# Evaluation function for regression
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(labels.cpu().numpy())

    mse = mean_squared_error(actuals, predictions)
    print(f"Test MSE: {mse:.2f}")
    print(f"Test RMSE: {mse ** 0.5:.2f}")

# Train the model
train_model(model, train_loader)

# Evaluate the model
evaluate_model(model, test_loader)

# Save the trained model
torch.save(model.state_dict(), 'model_LargeNN.pth')

print("Training and evaluation completed!")
