In [2]:
import sys
import polars as pl
#from verstack import LGBMTuner
import lightgbm as lgb
from sklearn.model_selection import train_test_split

In [5]:
train_x = pl.scan_parquet(f"train_selected_3.parquet", n_rows = 25000000).to_pandas()
train_x, test_x, train_y, test_y = train_test_split(train_x.drop(columns = 'state'), train_x['state'], test_size = 0.2)

27


In [None]:
tuner = LGBMTuner(metric = 'accuracy',
              trials = 5,
              refit = True,
              verbosity = 0,
              visualization = False,
              device_type = 'cpu')
tuner.grid["bagging_fraction"] = (0.9,1)
tuner.grid["feature_fraction"] = (0.4,0.7)
tuner.grid["learning_rate"] = (0.05,0.08)
tuner.grid["num_leaves"] = (10,25)
tuner.grid['num_iterations'] = (100,1000)
tuner.fit(train_x, train_y) 
tuner.plot_importances(n_features = 40, figsize = (10,10))
with open('todrop.txt', 'a') as file:
    file.write(f"Param importances for Final Tree: {tuner.feature_importances.to_list()}\n{tuner.feature_importances.index.to_list()}")

In [6]:
# Convert data to LightGBM dataset format
lgb_train = lgb.Dataset(train_x, label=train_y)
del train_x
del train_y

# Set parameters
params = {
    'learning_rate': 0.068,
    'num_leaves': 25,
    'feature_fraction': 0.5,
    'bagging_fraction': 0.9,
    'bagging_freq': 1,
    'max_depth': -1,
    'verbosity': -1,
    'lambda_l1': 1,
    'lambda_l2': 0.0,
    'min_split_gain': 0.0,
    'zero_as_missing': False,
    'max_bin': 255,
    'min_data_in_bin': 3,
    'random_state': 42,
    'device_type': 'cpu',  # Use 'gpu' if you want to use GPU
    'num_classes': 3,
    'objective': 'multiclass',
    'metric': 'multi_logloss',
    'num_threads': 128,
}
bst = lgb.train(params, lgb_train, num_boost_round=759)
#bst.save_model('boosters/25M.txt')
preds = bst.predict(test_x)
from sklearn.metrics import accuracy_score
import numpy as np
accuracy = accuracy_score(test_y, np.argmax(preds,axis = 1))
print(accuracy)

In [10]:
test = pl.scan_parquet(f"train_selected_3.parquet")
test.columns

0.9205488


In [3]:
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
clf = make_pipeline(StandardScaler(),
                    LinearSVC(dual=False,max_iter = 1000))
clf.fit(train_x, train_y)
pred_y = clf.predict(test_x)
print(accuracy_score(test_y, pred_y))

0.8153216666666666


In [2]:
import sys
from DeepLabV3 import TimeFrameDataset
from CNN import Encoder
#from Mixer import MLPMixer
from torch.utils.data import random_split, DataLoader
import torch.nn as nn
import torch 
import numpy as np 
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.utils.notebook.plotting import init_notebook_plotting, render

# Create a DataLoader
train_dataset = TimeFrameDataset('train_1440.parquet', 1440, 'target_1440.txt')
train_dataset, val_dataset = random_split(train_dataset, [0.8,0.2])
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)



In [4]:

def train_evaluate(parameters):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    #model = DeepLabV3(in_channels = 26, encoder_depth = 4).float().to(device)
    model = Encoder(in_channels = 26, **parameters).float().to(device)
    #model = lab = MLPMixer(**parameters).float().to(device)
    criterion = nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)  
    num_epochs  = 50
    early_stopping = []
    best_loss = 10000000
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs.to(device))
            loss = criterion(outputs.squeeze()+720, labels.to(device))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        #print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {running_loss / len(train_loader)}')
        
        model.eval()
        running_dist = 0
        for inputs, labels in val_loader:
            preds = model(inputs.to(device)) 
            running_dist += torch.sum(torch.abs((preds.detach().cpu().squeeze()+720)-labels))
        if len(early_stopping) < 4:
            early_stopping.append(running_dist / len(val_dataset))
            best_loss = min(best_loss, np.mean(early_stopping))
        else:
            early_stopping.pop(0)
            early_stopping.append(running_dist / len(val_dataset))
            if best_loss < np.mean(early_stopping):
                print(f"Best Epoch: {epoch-1}")
                print(f"Best Loss: {best_loss}")
                return np.mean(early_stopping)
            best_loss = np.mean(early_stopping)
    print(f"Best Loss: {best_loss}")
    print(f"Best Epoch: {epoch}")
    return best_loss
        #print(f'Epoch {epoch + 1}/{num_epochs}, Validation L1 loss: {running_dist / len(val_dataset)}')    

        
#Hyperparameter optimizaiton
ax_client = AxClient()
"""
ax_client = (
    AxClient.load_from_json_file("mixer_optimization.json")
)

ax_client.create_experiment(
    name="tune_mlp_mixer",  # The name of the experiment.
    parameters=[
        {
            "name": "timeframe",
            "type": "fixed",
            "value": 1440,
        },
        {
            "name": "channels",
            "type": "fixed",
            "value": 26,
        },
        {
            "name": "patch_size",
            "type": "choice",
            "values": [16,24,32]
        },
        {
            "name": "dim",
            "type": "choice",
            "values": [128,256,512]
        },
        {
            "name": "depth",
            "type": "choice",
            "values": [4,6]
        },
        {
            "name": "num_classes",
            "type": "fixed",
            "value": 1,
        },
        {
            "name": "dropout",
            "type": "choice",
            "values": [0.0,0.05]
        },
        {
            "name": "expansion_factor",
            "type": "range",
            "bounds": [1,6],
            "value_type": "int"
        },
        {
            "name": "expansion_factor_token",
            "type": "choice",
            "values": [0.5,0.7],
            "value_type": "float"
        },
    ],
    objectives={"L1 Loss": ObjectiveProperties(minimize=True)},
)    
"""
ax_client.create_experiment(
    name="tune_cnn_encoder",  # The name of the experiment.
    parameters=[
        {
            "name": "depth",
            "type": "range",
            "bounds": [1,5],
        },
        {
            "name": "dropout",
            "type": "range",
            "bounds": [0.0,0.5],
            "value_type": "float"
        },
    ],
    objectives={"L1 Loss": ObjectiveProperties(minimize=True)},
) 

for i in range(10):
    parameters, trial_index = ax_client.get_next_trial()
    # Local evaluation here can be replaced with deployment to external system.
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters))
    

[INFO 11-21 12:45:20] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.
[INFO 11-21 12:45:20] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter depth. If that is not the expected value type, you can explicitly specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 11-21 12:45:20] ax.service.utils.instantiation: Created search space: SearchSpace(parameters=[RangeParameter(name='depth', parameter_type=INT, range=[1, 5]), RangeParameter(name='dropout', parameter_type=FLOAT, range=[0.0, 0.5])], parameter_constraints=[]).
[INFO 11-21 12:45:20] ax.modelbridge.dispatch_utils: Using Models.BOTORCH_MODULAR since there are more ordered parameters than there are categories for the unordered categorical parameters.
[INFO 11-21 12:45:20] ax.modelbridge.dispatch_utils: Calculating the numbe

Best Epoch: 10
Best Loss: 93.38676452636719


RuntimeError: mat1 and mat2 shapes cannot be multiplied (17280x180 and 45x1)

In [14]:
render(ax_client.get_contour_plot(param_x="expansion_factor", param_y="dropout", metric_name="L1 Loss"))
best_parameters, values = ax_client.get_best_parameters()
print(best_parameters)
ax_client.save_to_json_file("mixer_optimization.json")

[INFO 11-21 12:13:11] ax.service.ax_client: Retrieving contour plot with parameter 'expansion_factor' on X-axis and 'dropout' on Y-axis, for metric 'L1 Loss'. Remaining parameters are affixed to the middle of their range.


[INFO 11-21 12:13:13] ax.service.ax_client: Saved JSON-serialized state of optimization to `mixer_optimization.json`.


{'patch_size': 24, 'dim': 512, 'depth': 4, 'dropout': 0.05679812183116442, 'expansion_factor': 3, 'expansion_factor_token': 0.7, 'timeframe': 1440, 'channels': 26, 'num_classes': 1}
