In [1]:
import pandas as pd
import pytorch_tabular

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score, roc_auc_score
from prettytable import PrettyTable

In [3]:
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from pytorch_tabular import TabularModel

In [4]:
from pytorch_tabular import available_models
available_models()

['AutoIntConfig',
 'CategoryEmbeddingModelConfig',
 'DANetConfig',
 'FTTransformerConfig',
 'GANDALFConfig',
 'GatedAdditiveTreeEnsembleConfig',
 'MDNConfig',
 'NodeConfig',
 'TabNetModelConfig',
 'TabTransformerConfig']

In [4]:
df=pd.read_csv("new_data.csv")

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10267 entries, 0 to 10266
Data columns (total 49 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   trans_id                    10267 non-null  float64
 1   trans_date_trans_time       10267 non-null  object 
 2   cc_num                      10267 non-null  float64
 3   merchant                    10267 non-null  object 
 4   category                    10267 non-null  object 
 5   amt                         10267 non-null  float64
 6   first                       10267 non-null  object 
 7   last                        10267 non-null  object 
 8   gender                      10267 non-null  object 
 9   street                      10267 non-null  object 
 10  city                        10267 non-null  object 
 11  state                       10267 non-null  object 
 12  lat                         10267 non-null  float64
 13  long                        102

In [5]:
numeric_columns = ['trans_id', 'cc_num', 'amt', 'lat', 'long', 'city_pop', 'merch_lat', 'merch_long', 'customer_id', 'distance', 'age', 'transaction_month', 'transaction_year', 'transaction_day', 'transaction_weekday', 'is_weekend', 'log_amt', 'amt_squared', 'transaction_count', 'average_transaction_amount', 'merchant_transaction_count', 'merchant_avg_amt', 'std_transaction_amount', 'is_senior', 'is_minor', 'amt_age_ratio', 'distance_amt_ratio', 'amt_per_merchant', 'monthly_spending', 'is_fraud_and_weekend', 'amt_variance', 'amt_diff_mean', 'total_transaction_amt']
df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())

# Categorical columns: Fill missing values with the mode (most frequent value) of each column
categorical_columns = ['merchant', 'category', 'first', 'last', 'gender', 'street', 'city', 'state', 'job', 'transaction_date', 'transaction_time', 'population_category', 'time_of_day']
df[categorical_columns] = df[categorical_columns].fillna(df[categorical_columns].mode().iloc[0])

# Verify if there are any remaining missing values
missing_values = df.isnull().sum()
print(missing_values)


trans_id                      0
trans_date_trans_time         0
cc_num                        0
merchant                      0
category                      0
amt                           0
first                         0
last                          0
gender                        0
street                        0
city                          0
state                         0
lat                           0
long                          0
city_pop                      0
job                           0
dob                           0
merch_lat                     0
merch_long                    0
is_fraud                      0
customer_id                   0
distance                      0
age                           0
transaction_date              0
transaction_month             0
transaction_year              0
transaction_time              0
transaction_day               0
transaction_weekday           0
is_weekend                    0
population_category           0
log_amt 

In [6]:
df["is_fraud"].fillna(0,inplace=True)

In [7]:
df_encoded = df.copy()

# Convert 'trans_date_trans_time' and 'dob' to datetime
df_encoded['trans_date_trans_time'] = pd.to_datetime(df_encoded['trans_date_trans_time'])
df_encoded['dob'] = pd.to_datetime(df_encoded['dob'])

# Extract useful features from datetime columns
df_encoded['trans_year'] = df_encoded['trans_date_trans_time'].dt.year
df_encoded['trans_month'] = df_encoded['trans_date_trans_time'].dt.month
df_encoded['trans_day'] = df_encoded['trans_date_trans_time'].dt.day
df_encoded['trans_hour'] = df_encoded['trans_date_trans_time'].dt.hour
df_encoded['trans_minute'] = df_encoded['trans_date_trans_time'].dt.minute
df_encoded['trans_second'] = df_encoded['trans_date_trans_time'].dt.second

df_encoded['dob_year'] = df_encoded['dob'].dt.year
df_encoded['dob_month'] = df_encoded['dob'].dt.month
df_encoded['dob_day'] = df_encoded['dob'].dt.day

# Drop the original datetime columns
df_encoded = df_encoded.drop(columns=['trans_date_trans_time', 'dob'])

In [8]:
df_encoded.to_csv("data_nocolinear.csv",index=False)

In [34]:
df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10267 entries, 0 to 10266
Data columns (total 54 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   cc_num                      10267 non-null  float64
 1   merchant                    10267 non-null  object 
 2   category                    10267 non-null  object 
 3   amt                         10267 non-null  float64
 4   first                       10267 non-null  object 
 5   last                        10267 non-null  object 
 6   gender                      10267 non-null  object 
 7   street                      10267 non-null  object 
 8   city                        10267 non-null  object 
 9   state                       10267 non-null  object 
 10  lat                         10267 non-null  float64
 11  long                        10267 non-null  float64
 12  city_pop                    10267 non-null  float64
 13  job                         102

In [33]:
df_encoded=pd.read_csv("data_nocolinear.csv")

In [35]:
df_encoded=df_encoded.drop(columns=["trans_id","customer_id"])

KeyError: "['trans_id', 'customer_id'] not found in axis"

In [37]:
print(df_encoded["is_fraud"])

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
10262    0.0
10263    0.0
10264    1.0
10265    0.0
10266    1.0
Name: is_fraud, Length: 10267, dtype: float64


In [38]:
numeric_columns = df_encoded.select_dtypes(include=['float64', 'int64', 'int32']).columns.tolist()

# List of categorical columns
categorical_columns = df_encoded.select_dtypes(include=['object']).columns.tolist()

print("Numeric Columns:", numeric_columns)
print("Categorical Columns:", categorical_columns)

Numeric Columns: ['cc_num', 'amt', 'lat', 'long', 'city_pop', 'merch_lat', 'merch_long', 'is_fraud', 'distance', 'age', 'transaction_month', 'transaction_year', 'transaction_day', 'transaction_weekday', 'is_weekend', 'log_amt', 'amt_squared', 'transaction_count', 'average_transaction_amount', 'merchant_transaction_count', 'merchant_avg_amt', 'std_transaction_amount', 'is_senior', 'is_minor', 'amt_age_ratio', 'distance_amt_ratio', 'amt_per_merchant', 'monthly_spending', 'is_fraud_and_weekend', 'amt_variance', 'amt_diff_mean', 'total_transaction_amt', 'trans_year', 'trans_month', 'trans_day', 'trans_hour', 'trans_minute', 'trans_second', 'dob_year', 'dob_month', 'dob_day']
Categorical Columns: ['merchant', 'category', 'first', 'last', 'gender', 'street', 'city', 'state', 'job', 'transaction_date', 'transaction_time', 'population_category', 'time_of_day']


In [39]:
numeric_columns.remove("is_fraud")


In [40]:
print(numeric_columns)

['cc_num', 'amt', 'lat', 'long', 'city_pop', 'merch_lat', 'merch_long', 'distance', 'age', 'transaction_month', 'transaction_year', 'transaction_day', 'transaction_weekday', 'is_weekend', 'log_amt', 'amt_squared', 'transaction_count', 'average_transaction_amount', 'merchant_transaction_count', 'merchant_avg_amt', 'std_transaction_amount', 'is_senior', 'is_minor', 'amt_age_ratio', 'distance_amt_ratio', 'amt_per_merchant', 'monthly_spending', 'is_fraud_and_weekend', 'amt_variance', 'amt_diff_mean', 'total_transaction_amt', 'trans_year', 'trans_month', 'trans_day', 'trans_hour', 'trans_minute', 'trans_second', 'dob_year', 'dob_month', 'dob_day']


In [41]:
df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10267 entries, 0 to 10266
Data columns (total 54 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   cc_num                      10267 non-null  float64
 1   merchant                    10267 non-null  object 
 2   category                    10267 non-null  object 
 3   amt                         10267 non-null  float64
 4   first                       10267 non-null  object 
 5   last                        10267 non-null  object 
 6   gender                      10267 non-null  object 
 7   street                      10267 non-null  object 
 8   city                        10267 non-null  object 
 9   state                       10267 non-null  object 
 10  lat                         10267 non-null  float64
 11  long                        10267 non-null  float64
 12  city_pop                    10267 non-null  float64
 13  job                         102

In [42]:
import pandas as pd

# Fill missing values in numeric columns with the mean of each column
df_encoded[numeric_columns] = df_encoded[numeric_columns].fillna(df_encoded[numeric_columns].mean())

# Fill missing values in categorical columns with the mode (most frequent value) of each column
df_encoded[categorical_columns] = df_encoded[categorical_columns].fillna(df_encoded[categorical_columns].mode().iloc[0])

# Verify if there are any remaining missing values
missing_values = df_encoded.isnull().sum()
print(missing_values)


cc_num                        0
merchant                      0
category                      0
amt                           0
first                         0
last                          0
gender                        0
street                        0
city                          0
state                         0
lat                           0
long                          0
city_pop                      0
job                           0
merch_lat                     0
merch_long                    0
is_fraud                      0
distance                      0
age                           0
transaction_date              0
transaction_month             0
transaction_year              0
transaction_time              0
transaction_day               0
transaction_weekday           0
is_weekend                    0
population_category           0
log_amt                       0
amt_squared                   0
transaction_count             0
average_transaction_amount    0
merchant

In [43]:
df_encoded["is_fraud"].fillna(0,inplace=True)

In [44]:
from sklearn.preprocessing import LabelEncoder
label_encoder=LabelEncoder()
for i in categorical_columns:
    df_encoded[i]=label_encoder.fit_transform(df_encoded[i])

In [45]:
df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10267 entries, 0 to 10266
Data columns (total 54 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   cc_num                      10267 non-null  float64
 1   merchant                    10267 non-null  int64  
 2   category                    10267 non-null  int64  
 3   amt                         10267 non-null  float64
 4   first                       10267 non-null  int64  
 5   last                        10267 non-null  int64  
 6   gender                      10267 non-null  int64  
 7   street                      10267 non-null  int64  
 8   city                        10267 non-null  int64  
 9   state                       10267 non-null  int64  
 10  lat                         10267 non-null  float64
 11  long                        10267 non-null  float64
 12  city_pop                    10267 non-null  float64
 13  job                         102

In [46]:
from sklearn.model_selection import train_test_split
train_dat, test_dat = train_test_split(df_encoded, random_state=42, test_size=0.2)
train_dat, val_dat = train_test_split(train_dat, random_state=42, test_size=0.2)
print(f"Train Shape: {train_dat.shape} | Val Shape: {val_dat.shape} | Test Shape: {test_dat.shape}")

Train Shape: (6570, 54) | Val Shape: (1643, 54) | Test Shape: (2054, 54)


In [21]:
train.head(1)

NameError: name 'train' is not defined

In [47]:
num_col_names=numeric_columns
cat_col_names=categorical_columns

In [48]:
print(cat_col_names)

['merchant', 'category', 'first', 'last', 'gender', 'street', 'city', 'state', 'job', 'transaction_date', 'transaction_time', 'population_category', 'time_of_day']


In [None]:
#auto int 

In [17]:
from pytorch_tabular.models import AutoIntConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from pytorch_tabular import TabularModel
data_config_3=DataConfig(
    target=["is_fraud"],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names
)

trainer_config_3=TrainerConfig(
    batch_size=1024,
    max_epochs=100
)

optimizer_config_3=OptimizerConfig()

model_config_3=AutoIntConfig(
    task="classification",
)

tabular_model_3=TabularModel(
    data_config=data_config_3,
    trainer_config=trainer_config_3,
    optimizer_config=optimizer_config_3,
    model_config=model_config_3,
    verbose=True
)

In [49]:
tabular_model_3.fit(train=train_dat,validation=val_dat)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x284a43e50>

In [82]:
pred_autoint=tabular_model_3.predict(test_dat)

In [83]:
pred_autoint

Unnamed: 0,0.0_probability,1.0_probability,prediction
9309,1.550985e-08,1.000000e+00,1.0
7277,9.999999e-01,8.364387e-08,0.0
2908,9.998093e-01,1.906659e-04,0.0
8491,0.000000e+00,1.000000e+00,1.0
3815,9.999979e-01,2.165139e-06,0.0
...,...,...,...
400,7.243845e-04,9.992756e-01,1.0
2957,1.518665e-03,9.984813e-01,1.0
4151,2.150723e-07,9.999998e-01,1.0
4098,1.000000e+00,3.592877e-08,0.0


In [85]:
true_labels = test['is_fraud'].values
predicted_labels = pred_autoint['prediction'].astype(int)  # Assuming 'prediction' contains the predicted class labels

# Compute additional metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
roc_auc = roc_auc_score(true_labels, pred_autoint[['0.0_probability', '1.0_probability']].values[:, 1])  # Use probability for class 1
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)

metrics_table = PrettyTable()
metrics_table.field_names = ["Metric", "Value"]
metrics_table.add_row(["Precision", precision])
metrics_table.add_row(["Recall", recall])
metrics_table.add_row(["F1 Score", f1])
metrics_table.add_row(["ROC AUC Score", roc_auc])
print("Additional Evaluation Metrics:")
print(metrics_table)

# Print confusion matrix
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0][0], conf_matrix[0][1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1][0], conf_matrix[1][1]])
print("Confusion Matrix:")
print(conf_matrix_table)

# Print classification report
print("Classification Report:")
print(class_report)

import plotly.express as px
fig=px.imshow(conf_matrix,
             labels=dict(x="Predicted Label",y="True Label"))
fig=px.imshow(conf_matrix,labels=dict(x="Predicted Label", y="True Label", color="Count"),
      x=["Non-Fraud","Fraud"],
        y=["Non-Fraud","Fraud"],
       text_auto=True,
       color_continuous_scale='Viridis'
      )
fig.update_layout(title='Confusion Matrix')
fig.show()

Additional Evaluation Metrics:
+---------------+--------------------+
|     Metric    |       Value        |
+---------------+--------------------+
|   Precision   | 0.9980411361410382 |
|     Recall    | 0.9960899315738025 |
|    F1 Score   |  0.99706457925636  |
| ROC AUC Score | 0.9995904099029783 |
+---------------+--------------------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |         1029        |        2        |
|   Actual Fraud   |          4          |       1019      |
+------------------+---------------------+-----------------+
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1031
         1.0       1.00      1.00      1.00      1023

    accuracy                           1.00      2054
   macro avg       1.00      1.0

In [31]:
tabular_model_3.evaluate(test_dat)

Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


[{'test_loss': 0.013176114298403263, 'test_accuracy': 0.9970788955688477}]

In [50]:
tabular_model_3.save_model("final_model")

In [53]:
fraud_count = test['is_fraud'].sum()
print(fraud_count)

1023.0


In [None]:
#Category_Embedding_Model

In [87]:
from pytorch_tabular.models import CategoryEmbeddingModelConfig
data_config_1 = DataConfig(
    target=["is_fraud"],  # Target variable (list)
    continuous_cols=num_col_names,  # List of continuous column names
    categorical_cols=cat_col_names,  # List of categorical column names
)

# Define your trainer configuration
trainer_config_1 = TrainerConfig(
    batch_size=1024,  # Batch size for training
    max_epochs=100,  # Maximum number of epochs
)

# Define your optimizer configuration
optimizer_config_1= OptimizerConfig()

# Define your model configuration
model_config_1=CategoryEmbeddingModelConfig(
    task="classification"
)

tabular_model_2=TabularModel(
    data_config=data_config_1,
    trainer_config=trainer_config_1,
    optimizer_config=optimizer_config_1,
    model_config=model_config_1,
    verbose=True
)

In [88]:
tabular_model_2.fit(train=train,validation=val)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x32b3342d0>

In [89]:
pred_CategoryEmbeddingModelConfig = tabular_model_2.predict(test)
pred_CategoryEmbeddingModelConfig.head()

Unnamed: 0,0.0_probability,1.0_probability,prediction
9309,0.000214,0.999786,1.0
7277,0.999316,0.000684,0.0
2908,0.999738,0.000262,0.0
8491,0.000788,0.999212,1.0
3815,0.996856,0.003144,0.0


In [90]:
true_labels = test['is_fraud'].values
predicted_labels = pred_CategoryEmbeddingModelConfig['prediction'].astype(int)  # Assuming 'prediction' contains the predicted class labels

# Compute additional metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
roc_auc = roc_auc_score(true_labels, pred_CategoryEmbeddingModelConfig[['0.0_probability', '1.0_probability']].values[:, 1])  # Use probability for class 1
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)

metrics_table = PrettyTable()
metrics_table.field_names = ["Metric", "Value"]
metrics_table.add_row(["Precision", precision])
metrics_table.add_row(["Recall", recall])
metrics_table.add_row(["F1 Score", f1])
metrics_table.add_row(["ROC AUC Score", roc_auc])
print("Additional Evaluation Metrics:")
print(metrics_table)

# Print confusion matrix
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0][0], conf_matrix[0][1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1][0], conf_matrix[1][1]])
print("Confusion Matrix:")
print(conf_matrix_table)

# Print classification report
print("Classification Report:")
print(class_report)

import plotly.express as px
fig=px.imshow(conf_matrix,
             labels=dict(x="Predicted Label",y="True Label"))
fig=px.imshow(conf_matrix,labels=dict(x="Predicted Label", y="True Label", color="Count"),
      x=["Non-Fraud","Fraud"],
        y=["Non-Fraud","Fraud"],
       text_auto=True,
       color_continuous_scale='Viridis'
      )
fig.update_layout(title='Confusion Matrix')
fig.show()

Additional Evaluation Metrics:
+---------------+--------------------+
|     Metric    |       Value        |
+---------------+--------------------+
|   Precision   | 0.9980353634577603 |
|     Recall    | 0.9931573802541545 |
|    F1 Score   | 0.9955903968642822 |
| ROC AUC Score | 0.999641608665106  |
+---------------+--------------------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |         1029        |        2        |
|   Actual Fraud   |          7          |       1016      |
+------------------+---------------------+-----------------+
Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00      1031
         1.0       1.00      0.99      1.00      1023

    accuracy                           1.00      2054
   macro avg       1.00      1.0

In [91]:
from pytorch_tabular.models import GatedAdditiveTreeEnsembleConfig

In [92]:
data_config_2=DataConfig(
    target=['is_fraud'],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names
)

trainer_config_2=TrainerConfig(
    batch_size=1024,
    max_epochs=100
)

optimizer_config_2=OptimizerConfig()

model_config_2=GatedAdditiveTreeEnsembleConfig(
    task="classification"
)

tabular_model_2=TabularModel(
    data_config=data_config_2,
    trainer_config=trainer_config_2,
    optimizer_config=optimizer_config_2,
    model_config=model_config_2,
    verbose=True
)

In [93]:
tabular_model_2.fit(train=train,validation=val)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x32b688090>

In [94]:
pred_gate=tabular_model_2.predict(test)

In [95]:
true_labels = test['is_fraud'].values
predicted_labels = pred_gate['prediction'].astype(int)  # Assuming 'prediction' contains the predicted class labels

# Compute additional metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
roc_auc = roc_auc_score(true_labels, pred_gate[['0.0_probability', '1.0_probability']].values[:, 1])  # Use probability for class 1
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)

metrics_table = PrettyTable()
metrics_table.field_names = ["Metric", "Value"]
metrics_table.add_row(["Precision", precision])
metrics_table.add_row(["Recall", recall])
metrics_table.add_row(["F1 Score", f1])
metrics_table.add_row(["ROC AUC Score", roc_auc])
print("Additional Evaluation Metrics:")
print(metrics_table)

# Print confusion matrix
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0][0], conf_matrix[0][1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1][0], conf_matrix[1][1]])
print("Confusion Matrix:")
print(conf_matrix_table)

# Print classification report
print("Classification Report:")
print(class_report)

import plotly.express as px
fig=px.imshow(conf_matrix,
             labels=dict(x="Predicted Label",y="True Label"))
fig=px.imshow(conf_matrix,labels=dict(x="Predicted Label", y="True Label", color="Count"),
      x=["Non-Fraud","Fraud"],
        y=["Non-Fraud","Fraud"],
       text_auto=True,
       color_continuous_scale='Viridis'
      )
fig.update_layout(title='Confusion Matrix')
fig.show()

Additional Evaluation Metrics:
+---------------+--------------------+
|     Metric    |       Value        |
+---------------+--------------------+
|   Precision   | 0.9980353634577603 |
|     Recall    | 0.9931573802541545 |
|    F1 Score   | 0.9955903968642822 |
| ROC AUC Score | 0.9990461860240654 |
+---------------+--------------------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |         1029        |        2        |
|   Actual Fraud   |          7          |       1016      |
+------------------+---------------------+-----------------+
Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00      1031
         1.0       1.00      0.99      1.00      1023

    accuracy                           1.00      2054
   macro avg       1.00      1.0

In [96]:
tabular_model_2.evaluate(test)

Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



[{'test_loss': 0.02184310182929039, 'test_accuracy': 0.9956182837486267}]

In [104]:
from pytorch_tabular.models import GANDALFConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from pytorch_tabular import TabularModel

# Assuming you have defined num_col_names and cat_col_names earlier

# Define your data configuration
data_config = DataConfig(
    target=["is_fraud"],  # Target variable (list)
    continuous_cols=num_col_names,  # List of continuous column names
    categorical_cols=cat_col_names,  # List of categorical column names
)

# Define your trainer configuration
trainer_config = TrainerConfig(
    batch_size=1024,  # Batch size for training
    max_epochs=100,  # Maximum number of epochs
)

# Define your optimizer configuration
optimizer_config = OptimizerConfig()

# Define your model configuration
model_config = GANDALFConfig(
    task="classification",  # Task type (classification/regression)
    gflu_stages=6,  # Number of GFLU stages
    gflu_feature_init_sparsity=0.3,  # Initial sparsity for GFLU features
    gflu_dropout=0.0,  # Dropout probability for GFLU layers
    learning_rate=1e-3,  # Learning rate for optimization
)

# Create your TabularModel
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
    verbose=True
)



In [105]:
tabular_model.fit(train=train, validation=val)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x32c5f7a10>

In [121]:
fd=tabular_model.feature_importance()

In [122]:
fd=fd.sort_values(by="importance",ascending=False)

In [123]:
fd.head(10)

Unnamed: 0,Features,importance
13,cc_num,0.028473
50,dob_year,0.022063
40,is_fraud_and_weekend,0.018929
23,transaction_year,0.018628
4,gender,0.017988
25,transaction_weekday,0.017428
27,log_amt,0.017343
21,age,0.016806
19,merch_long,0.015575
24,transaction_day,0.015523


In [106]:
pred_gandalf = tabular_model.predict(test)

In [107]:
pred_gandalf = tabular_model.predict(test)
true_labels = test['is_fraud'].values
predicted_labels = pred_gandalf['prediction'].astype(int)  # Assuming 'prediction' contains the predicted class labels

# Compute additional metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
roc_auc = roc_auc_score(true_labels, pred_gandalf[['0.0_probability', '1.0_probability']].values[:, 1])  # Use probability for class 1
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)

metrics_table = PrettyTable()
metrics_table.field_names = ["Metric", "Value"]
metrics_table.add_row(["Precision", precision])
metrics_table.add_row(["Recall", recall])
metrics_table.add_row(["F1 Score", f1])
metrics_table.add_row(["ROC AUC Score", roc_auc])
print("Additional Evaluation Metrics:")
print(metrics_table)

# Print confusion matrix
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0][0], conf_matrix[0][1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1][0], conf_matrix[1][1]])
print("Confusion Matrix:")
print(conf_matrix_table)

# Print classification report
print("Classification Report:")
print(class_report)

import plotly.express as px
fig=px.imshow(conf_matrix,labels=dict(x="Predicted Label", y="True Label", color="Count"),
      x=["Non-Fraud","Fraud"],
        y=["Non-Fraud","Fraud"],
       text_auto=True,
       color_continuous_scale='Viridis'
      )
fig.update_layout(title='Confusion Matrix')
fig.show()

Additional Evaluation Metrics:
+---------------+--------------------+
|     Metric    |       Value        |
+---------------+--------------------+
|   Precision   | 0.9931170108161259 |
|     Recall    | 0.9872922776148583 |
|    F1 Score   | 0.9901960784313726 |
| ROC AUC Score | 0.9987096015693369 |
+---------------+--------------------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |         1024        |        7        |
|   Actual Fraud   |          13         |       1010      |
+------------------+---------------------+-----------------+
Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      1031
         1.0       0.99      0.99      0.99      1023

    accuracy                           0.99      2054
   macro avg       0.99      0.9

In [108]:
result = tabular_model.evaluate(test)

Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



In [109]:
from pytorch_tabular.models import TabTransformerConfig

In [110]:
model_config_5=TabTransformerConfig(
    task="classification"
)
tabular_model_5=TabularModel(
    data_config=data_config,
    model_config=model_config_5,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

In [111]:
tabular_model_5.fit(train=train,validation=val)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x3ff28f450>

In [112]:
pred_ttc=tabular_model_5.predict(test)

In [113]:
true_labels = test['is_fraud'].values
predicted_labels = pred_ttc['prediction'].astype(int)  # Assuming 'prediction' contains the predicted class labels

# Compute additional metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
roc_auc = roc_auc_score(true_labels, pred_ttc[['0.0_probability', '1.0_probability']].values[:, 1])  # Use probability for class 1
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)

metrics_table = PrettyTable()
metrics_table.field_names = ["Metric", "Value"]
metrics_table.add_row(["Precision", precision])
metrics_table.add_row(["Recall", recall])
metrics_table.add_row(["F1 Score", f1])
metrics_table.add_row(["ROC AUC Score", roc_auc])
print("Additional Evaluation Metrics:")
print(metrics_table)

# Print confusion matrix
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0][0], conf_matrix[0][1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1][0], conf_matrix[1][1]])
print("Confusion Matrix:")
print(conf_matrix_table)

# Print classification report
print("Classification Report:")
print(class_report)

import plotly.express as px
fig=px.imshow(conf_matrix,
             labels=dict(x="Predicted Label",y="True Label"))
fig=px.imshow(conf_matrix,labels=dict(x="Predicted Label", y="True Label", color="Count"),
      x=["Non-Fraud","Fraud"],
        y=["Non-Fraud","Fraud"],
       text_auto=True,
       color_continuous_scale='Viridis'
      )
fig.update_layout(title='Confusion Matrix')
fig.show()

Additional Evaluation Metrics:
+---------------+--------------------+
|     Metric    |       Value        |
+---------------+--------------------+
|   Precision   | 0.9990186457311089 |
|     Recall    | 0.9951124144672532 |
|    F1 Score   | 0.9970617042115573 |
| ROC AUC Score | 0.9999905187477541 |
+---------------+--------------------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |         1030        |        1        |
|   Actual Fraud   |          5          |       1018      |
+------------------+---------------------+-----------------+
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1031
         1.0       1.00      1.00      1.00      1023

    accuracy                           1.00      2054
   macro avg       1.00      1.0

In [114]:
tabular_model_5.evaluate(test)

Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



[{'test_loss': 0.007969359867274761, 'test_accuracy': 0.9970788955688477}]

In [115]:
from pytorch_tabular.models import TabNetModelConfig

In [116]:
model_config_8= TabNetModelConfig(
    task="classification"
)
tabular_model_8=TabularModel(
    data_config=data_config,
    trainer_config=trainer_config,
    optimizer_config=optimizer_config,
    model_config=model_config_8,
    verbose=True
)

In [117]:
tabular_model_8.fit(train=train,validation=val)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x42c5e5090>

In [118]:
pred_tabnetmod=tabular_model_8.predict(test)

In [119]:
true_labels = test['is_fraud'].values
predicted_labels = pred_tabnetmod['prediction'].astype(int)  # Assuming 'prediction' contains the predicted class labels

# Compute additional metrics
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
roc_auc = roc_auc_score(true_labels, pred_tabnetmod[['0.0_probability', '1.0_probability']].values[:, 1])  # Use probability for class 1
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels)

metrics_table = PrettyTable()
metrics_table.field_names = ["Metric", "Value"]
metrics_table.add_row(["Precision", precision])
metrics_table.add_row(["Recall", recall])
metrics_table.add_row(["F1 Score", f1])
metrics_table.add_row(["ROC AUC Score", roc_auc])
print("Additional Evaluation Metrics:")
print(metrics_table)

# Print confusion matrix
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0][0], conf_matrix[0][1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1][0], conf_matrix[1][1]])
print("Confusion Matrix:")
print(conf_matrix_table)

# Print classification report
print("Classification Report:")
print(class_report)

import plotly.express as px
fig=px.imshow(conf_matrix,
             labels=dict(x="Predicted Label",y="True Label"))
fig=px.imshow(conf_matrix,labels=dict(x="Predicted Label", y="True Label", color="Count"),
      x=["Non-Fraud","Fraud"],
        y=["Non-Fraud","Fraud"],
       text_auto=True,
       color_continuous_scale='Viridis'
      )
fig.update_layout(title='Confusion Matrix')
fig.show()

Additional Evaluation Metrics:
+---------------+--------------------+
|     Metric    |       Value        |
+---------------+--------------------+
|   Precision   | 0.4850691915513474 |
|     Recall    | 0.6510263929618768 |
|    F1 Score   | 0.5559265442404007 |
| ROC AUC Score | 0.4870718385001417 |
+---------------+--------------------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |         324         |       707       |
|   Actual Fraud   |         357         |       666       |
+------------------+---------------------+-----------------+
Classification Report:
              precision    recall  f1-score   support

         0.0       0.48      0.31      0.38      1031
         1.0       0.49      0.65      0.56      1023

    accuracy                           0.48      2054
   macro avg       0.48      0.4

In [120]:
tabular_model_8.evaluate(test)

Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



[{'test_loss': 0.6963024139404297, 'test_accuracy': 0.4819863736629486}]

In [129]:
!pip install wandb




In [130]:
import wandb
from pytorch_tabular.models import AutoIntConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular import TabularModel
from sklearn.metrics import accuracy_score

# Dummy data - replace with your actual data loading logic
train_data = ...
val_data = ...
test_data = ...
num_col_names = ...
cat_col_names = ...

# Function to train and evaluate the model
def train_model(config=None):
    with wandb.init(config=config):
        config = wandb.config
        
        # Data configuration
        data_config = DataConfig(
            target=["is_fraud"],
            continuous_cols=num_col_names,
            categorical_cols=cat_col_names
        )

        # Trainer configuration
        trainer_config = TrainerConfig(
            batch_size=config.batch_size,
            max_epochs=config.max_epochs
        )

        # Optimizer configuration
        optimizer_config = OptimizerConfig()

        # Model configuration
        model_config = AutoIntConfig(
            task="classification",
            learning_rate=config.learning_rate
        )

        # Create and train the model
        tabular_model = TabularModel(
            data_config=data_config,
            trainer_config=trainer_config,
            optimizer_config=optimizer_config,
            model_config=model_config,
            verbose=True
        )
        
        # Fit the model
        tabular_model.fit(train=train_data, validation=val_data)

        # Evaluate the model
        result = tabular_model.evaluate(test=test_data)
        accuracy = result['test_accuracy']
        
        # Log metrics to W&B
        wandb.log({"accuracy": accuracy})

# Set your project name
project_name = "payment-fraud-detection"

# Sweep configuration
sweep_config = {
    'method': 'random',  # or 'grid', 'bayes'
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'   
    },
    'parameters': {
        'batch_size': {
            'values': [512, 1024, 2048]
        },
        'max_epochs': {
            'min': 50,
            'max': 200
        },
        'learning_rate': {
            'min': 1e-5,
            'max': 1e-1
        },
    }
}

# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project=project_name)

# Execute the sweep
wandb.agent(sweep_id, train_model, count=50)


[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/abhilash/.netrc


Create sweep with ID: yrbx9nxx
Sweep URL: https://wandb.ai/iiitdmkurnool/payment-fraud-detection/sweeps/yrbx9nxx


[34m[1mwandb[0m: Agent Starting Run: zm6xft1x with config:
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	learning_rate: 0.0876862032392185
[34m[1mwandb[0m: 	max_epochs: 84
[34m[1mwandb[0m: Currently logged in as: [33mabhilashsankanagoudar[0m ([33miiitdmkurnool[0m). Use [1m`wandb login --relogin`[0m to force relogin


Traceback (most recent call last):
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run zm6xft1x errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()

[34m[1mwandb[0m: [32m[41mERROR[0m Run zm6xft1x errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self

Traceback (most recent call last):
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 7edj1qmf errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()

[34m[1mwandb[0m: [32m[41mERROR[0m Run 7edj1qmf errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self

Traceback (most recent call last):
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 0lxwaygl errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()

[34m[1mwandb[0m: [32m[41mERROR[0m Run 0lxwaygl errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self

Traceback (most recent call last):
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run sswz0kiw errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()

[34m[1mwandb[0m: [32m[41mERROR[0m Run sswz0kiw errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self

Traceback (most recent call last):
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 2dfjyi6k errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()

[34m[1mwandb[0m: [32m[41mERROR[0m Run 2dfjyi6k errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self

Traceback (most recent call last):
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run bs1gnazj errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4040372995.py", line 20, in train_model
    data_config = DataConfig(
                  ^^^^^^^^^^^
  File "<string>", line 16, in __init__
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/config/config.py", line 179, in __post_init__
    len(self.categorical_cols) + len(self.continuous_cols) + len(self.date_columns) > 0
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'ellipsis' has no len()

[34m[1mwandb[0m: [32m[41mERROR[0m Run bs1gnazj errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self

In [144]:
from pytorch_tabular.models import AutoIntConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from pytorch_tabular import TabularModel

# Original configurations
data_config = DataConfig(
    target=["is_fraud"],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names
)

trainer_config = TrainerConfig(
    batch_size=1024,
    max_epochs=100
)

optimizer_config = OptimizerConfig(
    optimizer="adam",
    lr=0.001
)

model_config = AutoIntConfig(
    task="classification",
    embedding_dim=32,
    n_heads=2,
    n_layers=2,
    dropout=0.1
)

# Initialize and train the model
tabular_model = TabularModel(
    data_config=data_config,
    trainer_config=trainer_config,
    optimizer_config=optimizer_config,
    model_config=model_config,
    verbose=True
)

# Fit the model (replace 'train_data' with your actual training data)
tabular_model.fit(train=train,validation=val)

# Evaluate the model (replace 'test_data' with your actual test data)
result = tabular_model.evaluate(test=test)
print("Initial configuration result: ", result)

# Manual tuning example
# Change batch size to 512, max epochs to 50, and learning rate to 0.01
trainer_config_2 = TrainerConfig(
    batch_size=512,
    max_epochs=50
)

optimizer_config_2 = OptimizerConfig(
    optimizer="adam",
    lr=0.01
)

model_config_2 = AutoIntConfig(
    task="classification",
    embedding_dim=64,  # Increase embedding dimension
    n_heads=4,         # Increase number of heads
    n_layers=3,        # Increase number of layers
    dropout=0.2        # Increase dropout rate
)

# Initialize and train the model with new configuration
tabular_model_2 = TabularModel(
    data_config=data_config,
    trainer_config=trainer_config_2,
    optimizer_config=optimizer_config_2,
    model_config=model_config_2,
    verbose=True
)

# Fit the model (replace 'train_data' with your actual training data)
tabular_model_2.fit(train=train,validation=val)

# Evaluate the model (replace 'test_data' with your actual test data)
result_2 = tabular_model_2.evaluate(test=test)
print("Tuned configuration result: ", result_2)


TypeError: OptimizerConfig.__init__() got an unexpected keyword argument 'lr'

In [146]:
from pytorch_tabular.models import AutoIntConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from pytorch_tabular import TabularModel
data_config_3=DataConfig(
    target=["is_fraud"],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names
)

trainer_config_3=TrainerConfig(
    batch_size=1024,
    max_epochs=100
)

optimizer_config_3= OptimizerConfig(
    optimizer="RMSprop", lr_scheduler="StepLR", lr_scheduler_params={"step_size": 10}
)

model_config_3=AutoIntConfig(
    task="classification",
)

tabular_model_3=TabularModel(
    data_config=data_config_3,
    trainer_config=trainer_config_3,
    optimizer_config=optimizer_config_3,
    model_config=model_config_3,
    verbose=True
)

In [147]:
tabular_model_3.fit(train=train,validation=val)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x44ab5e690>

In [152]:
tabular_model_3.evaluate(test)

Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



[{'test_loss': 0.3336988091468811, 'test_accuracy': 0.8997079133987427}]

In [150]:
!pip install torch_optimizer

Collecting torch_optimizer
  Downloading torch_optimizer-0.3.0-py3-none-any.whl.metadata (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.9/55.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-ranger>=0.1.1 (from torch_optimizer)
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl.metadata (509 bytes)
Downloading torch_optimizer-0.3.0-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytorch_ranger-0.1.1-py3-none-any.whl (14 kB)
Installing collected packages: pytorch-ranger, torch_optimizer
Successfully installed pytorch-ranger-0.1.1 torch_optimizer-0.3.0


In [151]:
from torch_optimizer import QHAdam

tabular_model_3.fit(
    train=train,
    validation=val,
    optimizer=QHAdam,
    optimizer_params={"nus": (0.7, 1.0), "betas": (0.95, 0.998)},
)

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

<pytorch_lightning.trainer.trainer.Trainer at 0x4f0732b10>

In [156]:
import optuna
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models import AutoIntConfig
from sklearn.metrics import accuracy_score  # Or any other metric you want to use
from sklearn.model_selection import train_test_split
import pandas as pd

def objective(trial):
    # Define the hyperparameter search space
    batch_size = trial.suggest_categorical("batch_size", [256, 512, 1024, 2048])
    max_epochs = trial.suggest_int("max_epochs", 50, 200)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
    dropout = trial.suggest_uniform("dropout", 0.1, 0.5)
    n_heads = trial.suggest_int("n_heads", 2, 8)
    n_blocks = trial.suggest_int("n_blocks", 1, 5)

    data_config = DataConfig(
        target="is_fraud",
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names,
    )

    trainer_config = TrainerConfig(
        batch_size=batch_size,
        max_epochs=max_epochs
    )

    optimizer_config = OptimizerConfig(
    )

    model_config = AutoIntConfig(
        task="classification",
        dropout=dropout,
        n_heads=n_heads,
        n_blocks=n_blocks
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=0
    )

    # Train the model
    tabular_model.fit(train=train, validation=val)

    # Evaluate the model
    result = tabular_model.evaluate(test)
    accuracy = result["accuracy"]  # Replace with your metric of interest

    return accuracy

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)


[I 2024-06-20 10:47:59,342] A new study created in memory with name: no-name-35ccca3e-9410-48ed-b149-7e5c9dd78b22

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.


suggest_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float instead.

[W 2024-06-20 10:47:59,346] Trial 0 failed with parameters: {'batch_size': 256, 'max_epochs': 88, 'learning_rate': 2.809785544689599e-05, 'dropout': 0.3967717663360744, 'n_heads': 2, 'n_blocks': 5} because of the following error: TypeError("AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'").
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^

TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

In [157]:
print("HELLO")

HELLO


In [158]:
import wandb

In [159]:
wandb.login()

True

In [160]:
import wandb

sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'valid_loss',
        'goal': 'minimize'   
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'lr_scheduler_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'n_heads': {
            'values': [2, 8]
        },
        'n_blocks': {
            'values': [1, 5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='fraud_classification')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_params=config.lr_scheduler_params,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        n_heads=config.n_heads,
        n_blocks=config.n_blocks,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model (replace with your data)
    tabular_model.fit(train=train, validation=val)

    # Log metrics (replace with your evaluation logic)
    results = tabular_model.evaluate(test=test)
    wandb.log(results)

# Run the sweep
wandb.agent(sweep_id, train)


Create sweep with ID: e4dwtqsy
Sweep URL: https://wandb.ai/iiitdmkurnool/fraud_classification/sweeps/e4dwtqsy


[34m[1mwandb[0m: Agent Starting Run: 0twaytq7 with config:
[34m[1mwandb[0m: 	auto_lr_find: False
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.5, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	n_blocks: 5
[34m[1mwandb[0m: 	n_heads: 2
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded\r'), FloatProgress(value=0.07433160317304867, max=1.…

Run 0twaytq7 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
    model_config = AutoIntConfig(
                   ^^^^^^^^^^^^^^
TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

[34m[1mwandb[0m: [32m[41mERROR[0m Run 0twaytq7 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     model_config = AutoIntConfig(
[34

VBox(children=(Label(value='0.001 MB of 0.029 MB uploaded\r'), FloatProgress(value=0.02522097428058749, max=1.…

Run lz0v0azy errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
    model_config = AutoIntConfig(
                   ^^^^^^^^^^^^^^
TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

[34m[1mwandb[0m: [32m[41mERROR[0m Run lz0v0azy errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     model_config = AutoIntConfig(
[34

VBox(children=(Label(value='0.001 MB of 0.029 MB uploaded\r'), FloatProgress(value=0.025188582062273618, max=1…

Run rfb9rdps errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
    model_config = AutoIntConfig(
                   ^^^^^^^^^^^^^^
TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

[34m[1mwandb[0m: [32m[41mERROR[0m Run rfb9rdps errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     model_config = AutoIntConfig(
[34

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded\r'), FloatProgress(value=0.0744044701499853, max=1.0…

Run 2wpz7ncj errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
    model_config = AutoIntConfig(
                   ^^^^^^^^^^^^^^
TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

[34m[1mwandb[0m: [32m[41mERROR[0m Run 2wpz7ncj errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     model_config = AutoIntConfig(
[34

VBox(children=(Label(value='0.001 MB of 0.029 MB uploaded\r'), FloatProgress(value=0.025226842157742548, max=1…

Run kc23g2f2 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
    model_config = AutoIntConfig(
                   ^^^^^^^^^^^^^^
TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

[34m[1mwandb[0m: [32m[41mERROR[0m Run kc23g2f2 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     model_config = AutoIntConfig(
[34

VBox(children=(Label(value='0.001 MB of 0.029 MB uploaded\r'), FloatProgress(value=0.025192767880882743, max=1…

Run xqrjr3j0 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
    model_config = AutoIntConfig(
                   ^^^^^^^^^^^^^^
TypeError: AutoIntConfig.__init__() got an unexpected keyword argument 'n_heads'

[34m[1mwandb[0m: [32m[41mERROR[0m Run xqrjr3j0 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2755846897.py", line 110, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     model_config = AutoIntConfig(
[34

In [167]:
import wandb
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig


# Define the sweep configuration
sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'valid_loss',
        'goal': 'minimize'   
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'lr_scheduler_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='your_project_name')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_params=config.lr_scheduler_params,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    # Adjust this part to use only the known parameters
    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model (replace with your data)
    tabular_model.fit(train=train, validation=val)

    # Log metrics (replace with your evaluation logic)
    results = tabular_model.evaluate(test=test)
    wandb.log(results)

# Run the sweep
wandb.agent(sweep_id, train)


Create sweep with ID: cn044tbr
Sweep URL: https://wandb.ai/iiitdmkurnool/your_project_name/sweeps/cn044tbr


[34m[1mwandb[0m: Agent Starting Run: gn7tn8ik with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.1, 'step_size': 10}
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run gn7tn8ik errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2047282636.py", line 124, in train
    tabular_model.fit(train=train, validation=val)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 755, in fit
    datamodule = self.prepare_dataloader(
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 527, in prepare_dataloader
    datamodule = TabularDatamodule(
                 ^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_datamodule.py", line 180, in __init__
    self.train = train.copy() if copy_data else train
                 ^^^^^^^^^^
AttributeError: 'function' object has no attribute 'copy'

[34m[1mwandb[0m: [32m[41

Seed set to 42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run zlz48vtj errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2047282636.py", line 124, in train
    tabular_model.fit(train=train, validation=val)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 755, in fit
    datamodule = self.prepare_dataloader(
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 527, in prepare_dataloader
    datamodule = TabularDatamodule(
                 ^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_datamodule.py", line 180, in __init__
    self.train = train.copy() if copy_data else train
                 ^^^^^^^^^^
AttributeError: 'function' object has no attribute 'copy'

[34m[1mwandb[0m: [32m[41

Seed set to 42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run ydcimhce errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2047282636.py", line 124, in train
    tabular_model.fit(train=train, validation=val)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 755, in fit
    datamodule = self.prepare_dataloader(
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 527, in prepare_dataloader
    datamodule = TabularDatamodule(
                 ^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_datamodule.py", line 180, in __init__
    self.train = train.copy() if copy_data else train
                 ^^^^^^^^^^
AttributeError: 'function' object has no attribute 'copy'

[34m[1mwandb[0m: [32m[41

Seed set to 42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 7v444dy5 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2047282636.py", line 124, in train
    tabular_model.fit(train=train, validation=val)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 755, in fit
    datamodule = self.prepare_dataloader(
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 527, in prepare_dataloader
    datamodule = TabularDatamodule(
                 ^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_datamodule.py", line 180, in __init__
    self.train = train.copy() if copy_data else train
                 ^^^^^^^^^^
AttributeError: 'function' object has no attribute 'copy'

[34m[1mwandb[0m: [32m[41

Seed set to 42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run p7ujkysz errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2047282636.py", line 124, in train
    tabular_model.fit(train=train, validation=val)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 755, in fit
    datamodule = self.prepare_dataloader(
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 527, in prepare_dataloader
    datamodule = TabularDatamodule(
                 ^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_datamodule.py", line 180, in __init__
    self.train = train.copy() if copy_data else train
                 ^^^^^^^^^^
AttributeError: 'function' object has no attribute 'copy'

[34m[1mwandb[0m: [32m[41

Seed set to 42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run c5cmv08f errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2047282636.py", line 124, in train
    tabular_model.fit(train=train, validation=val)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 755, in fit
    datamodule = self.prepare_dataloader(
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 527, in prepare_dataloader
    datamodule = TabularDatamodule(
                 ^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_datamodule.py", line 180, in __init__
    self.train = train.copy() if copy_data else train
                 ^^^^^^^^^^
AttributeError: 'function' object has no attribute 'copy'

[34m[1mwandb[0m: [32m[41

In [176]:
import wandb
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig

# Make sure to load your datasets correctly
# Replace these with your actual data loading code

# Define the sweep configuration
sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'valid_loss',
        'goal': 'minimize'   
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'lr_scheduler_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='your_project_name')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_params=config.lr_scheduler_params,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model with the training and validation data
    tabular_model.fit(train=train_dat, validation=val_dat)

    # Log metrics (replace with your evaluation logic)
    results = tabular_model.evaluate(test=test_dat)
    wandb.log(results)

# Run the sweep
wandb.agent(sweep_id, train)


Create sweep with ID: qhbufbgf
Sweep URL: https://wandb.ai/iiitdmkurnool/your_project_name/sweeps/qhbufbgf


[34m[1mwandb[0m: Agent Starting Run: qgjmcjk5 with config:
[34m[1mwandb[0m: 	auto_lr_find: False
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: ReduceLROnPlateau
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.1, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run qgjmcjk5 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4132155516.py", line 125, in train
    tabular_model.fit(train=train_dat, validation=val_dat)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 654, in train
    self.trainer.fit(self.model, train_loader, val_loader)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 544, in fit
    call._call_and_handle_interrupt(
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_light

[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run j3qdm850 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4132155516.py", line 125, in train
    tabular_model.fit(train=train_dat, validation=val_dat)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 654, in train
    self.trainer.fit(self.model, train_loader, val_loader)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 544, in fit
    call._call_and_handle_interrupt(
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_light

[34m[1mwandb[0m: 	optimizer: AdamW
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run ptzmqi6m errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4132155516.py", line 125, in train
    tabular_model.fit(train=train_dat, validation=val_dat)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodule)

[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.1, 'step_size': 10}
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run kdfxuene errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4132155516.py", line 125, in train
    tabular_model.fit(train=train_dat, validation=val_dat)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodule)

[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.5, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run k2l2abu2 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4132155516.py", line 125, in train
    tabular_model.fit(train=train_dat, validation=val_dat)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 654, in train
    self.trainer.fit(self.model, train_loader, val_loader)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 544, in fit
    call._call_and_handle_interrupt(
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_light

[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run b96hf5ol errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/4132155516.py", line 125, in train
    tabular_model.fit(train=train_dat, validation=val_dat)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 654, in train
    self.trainer.fit(self.model, train_loader, val_loader)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py", line 544, in fit
    call._call_and_handle_interrupt(
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_light

In [178]:
train_dat.to_csv("train_dat.csv",index=False)
val_dat.to_csv("val_dat.csv",index=False)
test_dat.to_csv("test_dat.csv",index=False)

In [180]:
import pandas as pd
import wandb
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig

# Example data loading - replace this with your actual data loading logic
train_data = pd.read_csv('train_dat.csv')  # Replace with your actual train data path
val_data = pd.read_csv('val_dat.csv')      # Replace with your actual validation data path
test_data = pd.read_csv('test_dat.csv')    # Replace with your actual test data path

# Define the sweep configuration
sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'valid_loss',
        'goal': 'minimize'   
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'lr_scheduler_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='your_project_name')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],  # Replace with your actual target column name
        continuous_cols=num_col_names,  # Replace with your actual continuous column names
        categorical_cols=cat_col_names  # Replace with your actual categorical column names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_params=config.lr_scheduler_params,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model with the training and validation data
    tabular_model.fit(train=train_data, validation=val_data)

    # Log metrics (replace with your evaluation logic)
    results = tabular_model.evaluate(test=test_data)
    wandb.log(results)

# Run the sweep
wandb.agent(sweep_id, train)


Create sweep with ID: vre0wb9g
Sweep URL: https://wandb.ai/iiitdmkurnool/your_project_name/sweeps/vre0wb9g


[34m[1mwandb[0m: Agent Starting Run: xttzwn2f with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.5, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run xttzwn2f errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2497031321.py", line 128, in train
    tabular_model.fit(train=train_data, validation=val_data)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodul

[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.5, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.003981071705534969
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_521f1aeb-f4cf-4f91-bfc6-852866928ba4.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_521f1aeb-f4cf-4f91-bfc6-852866928ba4.ckpt


Output()

`Trainer.fit` stopped: `max_epochs=50` reached.


Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run vxcpi237 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2497031321.py", line 132, in train
    wandb.log(results)
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 420, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 371, in wrapper_fn
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 361, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 1838, in log
    self._log(data=data, step=step, commit=commit)
  File "/opt/a

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run zwxyz9z5 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2497031321.py", line 128, in train
    tabular_model.fit(train=train_data, validation=val_data)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodul

[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: ReduceLROnPlateau
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.5, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 7s3fyl0x errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2497031321.py", line 128, in train
    tabular_model.fit(train=train_data, validation=val_data)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodul

[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.1, 'step_size': 5}
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: AdamW
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 18hiuiwi errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2497031321.py", line 128, in train
    tabular_model.fit(train=train_data, validation=val_data)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodul

[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	lr_scheduler_params: {'gamma': 0.1, 'step_size': 10}
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run a6ik3pt9 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/2497031321.py", line 128, in train
    tabular_model.fit(train=train_data, validation=val_data)
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 779, in fit
    return self.train(model, datamodule, callbacks, max_epochs, min_epochs, handle_oom)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_tabular/tabular_model.py", line 632, in train
    result = Tuner(self.trainer).lr_find(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/tuner/tuning.py", line 177, in lr_find
    self._trainer.fit(model, train_dataloaders, val_dataloaders, datamodul

In [25]:
# Define the sweep configuration
sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'   
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'cosine_annealing_lr_params': {
            'values': [{'T_max': 10, 'eta_min': 0}]
        },
        'step_lr_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'reduce_lr_on_plateau_params': {
            'values': [
                {'factor': 0.1, 'patience': 10},
                {'factor': 0.5, 'patience': 5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}


In [None]:
import wandb
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig

# Make sure to load your datasets correctly
# Replace these with your actual data loading code
train_data = pd.read_csv('train_dat.csv')
val_data = pd.read_csv('val_dat.csv')
test_data = pd.read_csv('test_dat.csv')

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='your_project_name')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    if config.lr_scheduler == 'CosineAnnealingLR':
        optimizer_config.lr_scheduler_params = config.cosine_annealing_lr_params
    elif config.lr_scheduler == 'StepLR':
        optimizer_config.lr_scheduler_params = config.step_lr_params
    elif config.lr_scheduler == 'ReduceLROnPlateau':
        optimizer_config.lr_scheduler_params = config.reduce_lr_on_plateau_params

    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model with the training and validation data
    tabular_model.fit(train=train_data, validation=val_data)

    # Log metrics (replace with your evaluation logic)
    results = tabular_model.evaluate(test=test_data)
    wandb.log(results.metrics)

# Run the sweep
wandb.agent(sweep_id, train)


Create sweep with ID: f07cewhy
Sweep URL: https://wandb.ai/iiitdmkurnool/your_project_name/sweeps/f07cewhy


[34m[1mwandb[0m: Agent Starting Run: w8rrb9zy with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_78dcdb8b-ac91-45f2-8fb3-7797b9b8c20d.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_78dcdb8b-ac91-45f2-8fb3-7797b9b8c20d.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=50` reached.


Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



VBox(children=(Label(value='0.001 MB of 0.012 MB uploaded\r'), FloatProgress(value=0.060238095238095236, max=1…

Run w8rrb9zy errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
    wandb.log(results.metrics)
              ^^^^^^^^^^^^^^^
AttributeError: 'list' object has no attribute 'metrics'

[34m[1mwandb[0m: [32m[41mERROR[0m Run w8rrb9zy errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log(results.metrics)
[34m[1mwandb[0m: [32m[41mERROR[0m 

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_a1c4c3d2-2c72-486d-b199-5ee2a09cb504.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_a1c4c3d2-2c72-486d-b199-5ee2a09cb504.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=200` reached.


Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



VBox(children=(Label(value='0.001 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.023364629829151915, max=1…

Run ilf92u3a errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
    wandb.log(results.metrics)
              ^^^^^^^^^^^^^^^
AttributeError: 'list' object has no attribute 'metrics'

[34m[1mwandb[0m: [32m[41mERROR[0m Run ilf92u3a errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log(results.metrics)
[34m[1mwandb[0m: [32m[41mERROR[0m 

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.



Output()

`Trainer.fit` stopped: `max_epochs=50` reached.


Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run a9zxg04e errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
    wandb.log(results.metrics)
              ^^^^^^^^^^^^^^^
AttributeError: 'list' object has no attribute 'metrics'

[34m[1mwandb[0m: [32m[41mERROR[0m Run a9zxg04e errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log(results.metrics)
[34m[1mwandb[0m: [32m[41mERROR[0m 

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_b104fbfa-7364-4bde-870a-c3a65d5c1c6b.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_b104fbfa-7364-4bde-870a-c3a65d5c1c6b.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



VBox(children=(Label(value='0.001 MB of 0.012 MB uploaded\r'), FloatProgress(value=0.060377058308805985, max=1…

Run 6d4vdbxb errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
    wandb.log(results.metrics)
              ^^^^^^^^^^^^^^^
AttributeError: 'list' object has no attribute 'metrics'

[34m[1mwandb[0m: [32m[41mERROR[0m Run 6d4vdbxb errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log(results.metrics)
[34m[1mwandb[0m: [32m[41mERROR[0m 

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_240b76db-8d09-4fe4-adf8-2752ec88258a.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_240b76db-8d09-4fe4-adf8-2752ec88258a.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=200` reached.


Output()


The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



VBox(children=(Label(value='0.001 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.023361753208778355, max=1…

Run anf2lqv4 errored:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
    wandb.log(results.metrics)
              ^^^^^^^^^^^^^^^
AttributeError: 'list' object has no attribute 'metrics'

[34m[1mwandb[0m: [32m[41mERROR[0m Run anf2lqv4 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/anaconda3/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/yv/n0msxd8s6h91yhq5g8421r900000gn/T/ipykernel_88863/1274795152.py", line 74, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log(results.metrics)
[34m[1mwandb[0m: [32m[41mERROR[0m 

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


The number of training batches (26) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_e735529a-45c3-400b-b657-5e59aab40d68.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_e735529a-45c3-400b-b657-5e59aab40d68.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

In [24]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mabhilashsankanagoudar[0m ([33miiitdmkurnool[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [27]:
import pandas as pd

In [28]:
train_data = pd.read_csv('train_dat.csv')
val_data = pd.read_csv('val_dat.csv')
test_data = pd.read_csv('test_dat.csv')

In [None]:
from pytorch_tabular.models import AutoIntConfig
from pytorch_tabular.config import (

In [31]:
from pytorch_tabular.models import AutoIntConfig

In [None]:
import wandb
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig

# Define the sweep configuration
sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'valid_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'cosine_annealing_lr_params': {
            'values': [{'T_max': 10, 'eta_min': 0}]
        },
        'step_lr_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'reduce_lr_on_plateau_params': {
            'values': [
                {'factor': 0.1, 'patience': 10},
                {'factor': 0.5, 'patience': 5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='your_project_name')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    if config.lr_scheduler == 'CosineAnnealingLR':
        optimizer_config.lr_scheduler_params = config.cosine_annealing_lr_params
    elif config.lr_scheduler == 'StepLR':
        optimizer_config.lr_scheduler_params = config.step_lr_params
    elif config.lr_scheduler == 'ReduceLROnPlateau':
        optimizer_config.lr_scheduler_params = config.reduce_lr_on_plateau_params

    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model with the training and validation data
    tabular_model.fit(train=train_data, validation=val_data)

    # Evaluate the model
    results = tabular_model.evaluate(test=test_data)

    # Convert the results to a dictionary if needed
    if isinstance(results, list):
        results_dict = {f"metric_{i}": result for i, result in enumerate(results)}
    else:
        results_dict = results

    # Log metrics (replace with your evaluation logic)
    wandb.log(results_dict)

# Run the sweep
wandb.agent(sweep_id, train)


Create sweep with ID: iqe3owdw
Sweep URL: https://wandb.ai/iiitdmkurnool/your_project_name/sweeps/iqe3owdw


[34m[1mwandb[0m: Agent Starting Run: tp035iml with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` 

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

LR finder stopped early after 76 steps due to diverging loss.
Learning rate set to 0.00478630092322638
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_328fbcab-3038-4e29-847e-ea4c0e34b0b6.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_328fbcab-3038-4e29-847e-ea4c0e34b0b6.ckpt


Output()

Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: v4qda466 with config:
[34m[1mwandb[0m: 	auto_lr_find: False
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


Output()

Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: gwdircs5 with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.5, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

LR finder stopped early after 76 steps due to diverging loss.
Learning rate set to 0.00478630092322638
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_b9a330ec-e808-4a2e-9b7d-4995596a25bc.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_b9a330ec-e808-4a2e-9b7d-4995596a25bc.ckpt


Output()

Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: cew6fh6x with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: ReduceLROnPlateau
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: AdamW
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.5, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` 

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

LR finder stopped early after 84 steps due to diverging loss.
Learning rate set to 0.01445439770745928
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_c73ee0cf-642b-4f18-8b42-4af5339d4645.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_c73ee0cf-642b-4f18-8b42-4af5339d4645.ckpt


Output()

Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.023568500807353124, max=1…

[34m[1mwandb[0m: Agent Starting Run: 1mk969n1 with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.5, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (26) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` 

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.2754228703338169
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_3b821d8a-71e9-4e61-a0a1-80ceea7cdaca.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_3b821d8a-71e9-4e61-a0a1-80ceea7cdaca.ckpt


Output()

`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.023465759777399908, max=1…

[34m[1mwandb[0m: Agent Starting Run: 0efit962 with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_4139ef0e-cc2e-4e61-aa96-75e9566f0f12.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_4139ef0e-cc2e-4e61-aa96-75e9566f0f12.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.023325947808961103, max=1…

[34m[1mwandb[0m: Agent Starting Run: 6rmhg86w with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.5754399373371567
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_309a584d-d92c-4af3-ae63-4be34fa151f9.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_309a584d-d92c-4af3-ae63-4be34fa151f9.ckpt


Output()

Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: zernkrzj with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: AdamW
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_1bea54bc-1689-42a2-b9b0-856179cefe55.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_1bea54bc-1689-42a2-b9b0-856179cefe55.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.0232693604758109, max=1.0…

[34m[1mwandb[0m: Agent Starting Run: fkoikgdi with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_a93bfae1-b965-4082-b034-c5a525354ea8.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_a93bfae1-b965-4082-b034-c5a525354ea8.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=200` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: rg1adbjt with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: ReduceLROnPlateau
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_6757877f-6146-4a41-9b7c-b4b4eefc5d1d.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_6757877f-6146-4a41-9b7c-b4b4eefc5d1d.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: za22orek with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_164d218e-be22-4a1f-9ccc-7a1958e5db1d.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_164d218e-be22-4a1f-9ccc-7a1958e5db1d.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dm5fz7ki with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params

Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` 

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_eba4d35c-7e4b-4a51-b165-7580a87e179f.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_eba4d35c-7e4b-4a51-b165-7580a87e179f.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=200` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: wirhjr3s with config:
[34m[1mwandb[0m: 	auto_lr_find: False
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 1
[34m[1mwandb[0m: 	optimizer: AdamW
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.5, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


Output()

`Trainer.fit` stopped: `max_epochs=200` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: id7hw3bv with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.0001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 10
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lr_scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.036307805477010104
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_c34f8d03-9396-466b-a4e0-07857c6bdc5b.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_c34f8d03-9396-466b-a4e0-07857c6bdc5b.ckpt


Output()

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: 66dvea04 with config:
[34m[1mwandb[0m: 	auto_lr_find: False
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.5, 'step_size': 10}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.


Output()

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_epochs=50` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: r863kkbm with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: None
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: max
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 1
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: ReduceLROnPlateau
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 200
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: AdamW
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0.1}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.1, 'patience': 10}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.5, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_4b575642-e49e-4f91-b1e1-db47ebf1562c.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_4b575642-e49e-4f91-b1e1-db47ebf1562c.ckpt
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.


Output()

`Trainer.fit` stopped: `max_epochs=200` reached.


Output()

/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: Agent Starting Run: yhtiq6b0 with config:
[34m[1mwandb[0m: 	auto_lr_find: True
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	cosine_annealing_lr_params: {'T_max': 10, 'eta_min': 0}
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	early_stopping: valid_loss
[34m[1mwandb[0m: 	early_stopping_min_delta: 0.001
[34m[1mwandb[0m: 	early_stopping_mode: min
[34m[1mwandb[0m: 	early_stopping_patience: 3
[34m[1mwandb[0m: 	gradient_clip_val: 0
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	lr_scheduler: ReduceLROnPlateau
[34m[1mwandb[0m: 	lr_scheduler_monitor_metric: valid_loss
[34m[1mwandb[0m: 	max_epochs: 50
[34m[1mwandb[0m: 	min_epochs: 50
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	optimizer_params: {'weight_decay': 0}
[34m[1mwandb[0m: 	reduce_lr_on_plateau_params: {'factor': 0.5, 'patience': 5}
[34m[1mwandb[0m: 	step_lr_params: {'gamma': 0.1, 'step_size': 5}


Seed set to 42


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /Users/abhilash/PyTorch_tabular/saved_models exists and is not empty.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
/opt/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` a

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.6918309709189363
Restoring states from the checkpoint path at /Users/abhilash/PyTorch_tabular/.lr_find_d4ad7955-74f2-4f65-9f2a-c6bfd06de0ab.ckpt
Restored all states from the checkpoint at /Users/abhilash/PyTorch_tabular/.lr_find_d4ad7955-74f2-4f65-9f2a-c6bfd06de0ab.ckpt


Output()

Trainer was signaled to stop but the required `min_epochs=50` or `min_steps=None` has not been met. Training will continue...


In [None]:
print("hello")

In [None]:
import wandb
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig, AutoIntConfig

# Define the sweep configuration
sweep_config = {
    'method': 'random',  # Can be 'grid', 'random', 'bayes'
    'metric': {
        'name': 'valid_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'batch_size': {
            'values': [256, 512, 1024, 2048]
        },
        'max_epochs': {
            'values': [50, 200]
        },
        'min_epochs': {
            'values': [1, 50]
        },
        'early_stopping': {
            'values': [None, 'valid_loss']
        },
        'early_stopping_min_delta': {
            'values': [0.0001, 0.001]
        },
        'early_stopping_mode': {
            'values': ['min', 'max']
        },
        'early_stopping_patience': {
            'values': [3, 10]
        },
        'gradient_clip_val': {
            'values': [0.0, 1.0]
        },
        'auto_lr_find': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['Adam', 'SGD', 'RMSprop', 'AdamW']
        },
        'optimizer_params': {
            'values': [{'weight_decay': 0.0}, {'weight_decay': 0.1}]
        },
        'lr_scheduler': {
            'values': ['CosineAnnealingLR', 'StepLR', 'ReduceLROnPlateau']
        },
        'cosine_annealing_lr_params': {
            'values': [{'T_max': 10, 'eta_min': 0}]
        },
        'step_lr_params': {
            'values': [
                {'step_size': 5, 'gamma': 0.1},
                {'step_size': 5, 'gamma': 0.5},
                {'step_size': 10, 'gamma': 0.1},
                {'step_size': 10, 'gamma': 0.5}
            ]
        },
        'reduce_lr_on_plateau_params': {
            'values': [
                {'factor': 0.1, 'patience': 10},
                {'factor': 0.5, 'patience': 5}
            ]
        },
        'lr_scheduler_monitor_metric': {
            'values': ['valid_loss']
        },
        'dropout': {
            'values': [0.1, 0.5]
        },
        'learning_rate': {
            'values': [1e-5, 1e-2]
        }
    }
}

# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='random_sweep')

# Function to train your model
def train():
    # Initialize a new run
    wandb.init()

    # Fetch hyperparameters
    config = wandb.config

    # Create the model configuration with fetched parameters
    data_config = DataConfig(
        target=["is_fraud"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names
    )

    trainer_config = TrainerConfig(
        batch_size=config.batch_size,
        max_epochs=config.max_epochs,
        min_epochs=config.min_epochs,
        early_stopping=config.early_stopping,
        early_stopping_min_delta=config.early_stopping_min_delta,
        early_stopping_mode=config.early_stopping_mode,
        early_stopping_patience=config.early_stopping_patience,
        gradient_clip_val=config.gradient_clip_val,
        auto_lr_find=config.auto_lr_find,
    )

    optimizer_config = OptimizerConfig(
        optimizer=config.optimizer,
        optimizer_params=config.optimizer_params,
        lr_scheduler=config.lr_scheduler,
        lr_scheduler_monitor_metric=config.lr_scheduler_monitor_metric,
    )

    if config.lr_scheduler == 'CosineAnnealingLR':
        optimizer_config.lr_scheduler_params = config.cosine_annealing_lr_params
    elif config.lr_scheduler == 'StepLR':
        optimizer_config.lr_scheduler_params = config.step_lr_params
    elif config.lr_scheduler == 'ReduceLROnPlateau':
        optimizer_config.lr_scheduler_params = config.reduce_lr_on_plateau_params

    model_config = AutoIntConfig(
        task="classification",
        dropout=config.dropout,
        learning_rate=config.learning_rate,
    )

    tabular_model = TabularModel(
        data_config=data_config,
        trainer_config=trainer_config,
        optimizer_config=optimizer_config,
        model_config=model_config,
        verbose=True
    )

    # Fit the model with the training and validation data
    tabular_model.fit(train=train_data, validation=val_data)

    # Evaluate the model
    results = tabular_model.evaluate(test=test_data)

    # Ensure results are in dictionary format
    if isinstance(results, list):
        results_dict = {f"metric_{i}": result for i, result in enumerate(results)}
    else:
        results_dict = results

    # Log metrics (replace with your evaluation logic)
    wandb.log(results_dict)

# Run the sweep
wandb.agent(sweep_id, train, count=25)
