In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [3]:
train_df['category'].fillna(train_df['category'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['category'].fillna(train_df['category'].median(), inplace=True)


In [4]:
train_df = train_df.drop(columns = ['trip_ID','travelling_with','trip_purpose','first_time_visitor','source_of_info','weather_at_arrival','tour_arrangement','special_requirements'])
test_df = test_df.drop(columns = ['trip_ID','travelling_with','trip_purpose','first_time_visitor','source_of_info','weather_at_arrival','tour_arrangement','special_requirements'])

In [5]:
train_df['total_nights'] = train_df['mainland_nights'] + train_df['island_nights']
test_df['total_nights'] = test_df['mainland_nights'] + test_df['island_nights']

In [6]:
from sklearn.model_selection import KFold
import pandas as pd

# Function to apply target encoding with K-Fold
def target_encode_column(train_df, test_df, target, col, n_splits=5):
    # Create Series to store the target-encoded values for train and test
    train_encoded = pd.Series(index=train_df.index, dtype='float64')
    test_encoded = pd.Series(index=test_df.index, dtype='float64')

    # Set up K-Fold for target encoding on the train set
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    for train_idx, valid_idx in kf.split(train_df):
        # Train and validation folds
        train_fold, valid_fold = train_df.iloc[train_idx], train_df.iloc[valid_idx]

        # Calculate target mean per category in train fold
        means = train_fold.groupby(col)[target].mean()

        # Map these means to the validation fold
        train_encoded.iloc[valid_idx] = valid_fold[col].map(means)

    # Fill missing values in train_encoded with the overall target mean
    train_encoded.fillna(train_df[target].mean(), inplace=True)

    # Map the target encoding means to the test set
    category_means = train_df.groupby(col)[target].mean()
    test_encoded = test_df[col].map(category_means)

    # Fill missing values in test_encoded with the overall target mean
    test_encoded.fillna(train_df[target].mean(), inplace=True)

    return train_encoded, test_encoded

# Apply target encoding on 'visitor_nation'
train_df['visitor_nation_encoded'], test_df['visitor_nation_encoded'] = target_encode_column(train_df, test_df, target='category', col='visitor_nation')

# Drop the original 'visitor_nation' column if not needed further
train_df = train_df.drop(columns=['visitor_nation'])
test_df = test_df.drop(columns=['visitor_nation'])

# Display first few rows to verify the encoding
train_df[['visitor_nation_encoded', 'category']].head(), test_df[['visitor_nation_encoded']].head()


(   visitor_nation_encoded  category
 0                0.270869       1.0
 1                1.253133       2.0
 2                0.696833       2.0
 3                0.282976       0.0
 4                0.284483       0.0,
    visitor_nation_encoded
 0                1.500000
 1                0.436170
 2                0.500000
 3                0.823529
 4                0.428986)

In [7]:
train_df['total_people'] = train_df['female_count'] + train_df['male_count']
test_df['total_people'] = test_df['female_count'] + test_df['male_count']

In [8]:
# 1. Average group size
train_df['average_group_size'] = train_df['total_people'] / train_df['total_nights']
test_df['average_group_size'] = test_df['total_people'] / test_df['total_nights']

# 2. Trip duration categories
train_df['trip_duration_category'] = pd.cut(train_df['total_nights'], bins=[0, 3, 7, float('inf')], labels=['short', 'medium', 'long'])
test_df['trip_duration_category'] = pd.cut(test_df['total_nights'], bins=[0, 3, 7, float('inf')], labels=['short', 'medium', 'long'])

# 3. Visitor demographics (family and group indicators)
train_df['is_family'] = ((train_df['female_count'] > 0) & (train_df['male_count'] > 0)).astype(int)
test_df['is_family'] = ((test_df['female_count'] > 0) & (test_df['male_count'] > 0)).astype(int)

train_df['is_group'] = (train_df['total_people'] > 2).astype(int)
test_df['is_group'] = (test_df['total_people'] > 2).astype(int)

# 4. Days in each type of location (mainland/island ratios)
train_df['mainland_ratio'] = train_df['mainland_nights'] / train_df['total_nights']
train_df['island_ratio'] = train_df['island_nights'] / train_df['total_nights']

test_df['mainland_ratio'] = test_df['mainland_nights'] / test_df['total_nights']
test_df['island_ratio'] = test_df['island_nights'] / test_df['total_nights']

In [9]:
train_df = train_df.drop(columns = ['female_count','male_count','mainland_nights','island_nights'])
test_df = test_df.drop(columns = ['female_count','male_count','mainland_nights','island_nights'])

In [10]:
train_df['key_activity'] = train_df['key_activity'].replace('Widlife Tourism', 'Wildlife Tourism')
test_df['key_activity'] = test_df['key_activity'].replace('Widlife Tourism', 'Wildlife Tourism')

In [11]:
mode_key_activity = train_df['key_activity'].mode()[0]

train_df['key_activity'].fillna(mode_key_activity, inplace=True)
test_df['key_activity'].fillna(mode_key_activity, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['key_activity'].fillna(mode_key_activity, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['key_activity'].fillna(mode_key_activity, inplace=True)


In [None]:
import numpy as np

In [12]:
import numpy as np

# Define mappings for range values to approximate numeric values
days_before_booked_map = {
    '1-7': 4,
    '8-14': 11,
    '15-30': 22.5,
    '31-60': 45.5,
    '61-90': 75.5,
    '90+': 90
}

tour_length_map = {
    '1-6': 3.5,
    '7-14': 10.5,
    '15-29': 22,
    '30+': 30
}

# Apply mappings to convert ranges to numeric values
train_df['days_before_booked_num'] = train_df['days_before_booked'].map(days_before_booked_map)
test_df['days_before_booked_num'] = test_df['days_before_booked'].map(days_before_booked_map)

train_df['tour_length_num'] = train_df['tour_length'].map(tour_length_map)
test_df['tour_length_num'] = test_df['tour_length'].map(tour_length_map)

# Fill missing values with median of each column
train_df['days_before_booked_num'].fillna(train_df['days_before_booked_num'].median(), inplace=True)
test_df['days_before_booked_num'].fillna(train_df['days_before_booked_num'].median(), inplace=True)

train_df['tour_length_num'].fillna(train_df['tour_length_num'].median(), inplace=True)
test_df['tour_length_num'].fillna(train_df['tour_length_num'].median(), inplace=True)

# Drop the original columns if not needed
train_df = train_df.drop(columns=['days_before_booked', 'tour_length'])
test_df = test_df.drop(columns=['days_before_booked', 'tour_length'])


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['days_before_booked_num'].fillna(train_df['days_before_booked_num'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['days_before_booked_num'].fillna(train_df['days_before_booked_num'].median(), inplace=True)
The behavior will change in pan

In [13]:
for column in ['transport_package_international', 'package_accomodation', 'food_package', 'insurance_package']:
    most_frequent = train_df[column].mode()[0]
    train_df[column].fillna(most_frequent, inplace=True)
    test_df[column].fillna(most_frequent, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df[column].fillna(most_frequent, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df[column].fillna(most_frequent, inplace=True)


In [14]:
train_df['total_people'].fillna(train_df['total_people'].median(), inplace=True)
test_df['total_people'].fillna(train_df['total_people'].median(), inplace=True)

train_df['average_group_size'].fillna(train_df['average_group_size'].median(), inplace=True)
test_df['average_group_size'].fillna(train_df['average_group_size'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['total_people'].fillna(train_df['total_people'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['total_people'].fillna(train_df['total_people'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never 

In [15]:
train_df.dropna(inplace=True)

In [16]:
from sklearn.preprocessing import LabelEncoder

train_df['age_bracket'] = train_df['age_bracket'].replace({'<18': 'Below 25', '18-24': 'Below 25'})
test_df['age_bracket'] = test_df['age_bracket'].replace({'<18': 'Below 25', '18-24': 'Below 25'})

label_encoder = LabelEncoder()
train_df['age_bracket_encoded'] = label_encoder.fit_transform(train_df['age_bracket'])
test_df['age_bracket_encoded'] = label_encoder.transform(test_df['age_bracket'])

In [17]:
train_df = train_df.drop(columns=['age_bracket'])
test_df = test_df.drop(columns=['age_bracket'])

In [18]:
train_df_encoded = pd.get_dummies(train_df, drop_first=True,dtype='int64')

In [19]:
test_df_encoded = pd.get_dummies(test_df, drop_first=True,dtype='int64')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

# Define the target column
# Replace 'target_column' with the actual name of the column in your DataFrame
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data first to avoid data leakage before scaling
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pandas as pd

# Define the target column
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=303)

# Initialize and apply the scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert back to DataFrames to retain column names
X_train = pd.DataFrame(X_train, columns=X.columns)
X_val = pd.DataFrame(X_val, columns=X.columns)

# Initialize the SVM classifier
svm_classifier = SVC(kernel='rbf', random_state=303)  # 'rbf' is the default kernel

# Train the SVM classifier
svm_classifier.fit(X_train, y_train)

# Predict and calculate accuracy on the validation set
y_pred = svm_classifier.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Accuracy Score:", accuracy)

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data with the same scaler used on the training data
test_df_encoded_scaled = scaler.transform(test_df_encoded)
test_df_encoded_scaled = pd.DataFrame(test_df_encoded_scaled, columns=test_df_encoded.columns)

# Make predictions on the test set
predictions = svm_classifier.predict(test_df_encoded_scaled)

# Create a DataFrame with trip_ID and predictions
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': predictions
})

# Save predictions to a new CSV file
results_df.to_csv('predictions_results_svm.csv', index=False)
print("Predictions saved to predictions_results_svm.csv")


Accuracy Score: 0.7545454545454545
Predictions saved to predictions_results_svm.csv


In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# Define the target column
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=303)

# Initialize and apply the scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert back to DataFrames to retain column names
X_train = pd.DataFrame(X_train, columns=X.columns)
X_val = pd.DataFrame(X_val, columns=X.columns)

# Define the SVM classifier
svm = SVC(random_state=303)

# Define the parameter distribution for tuning
param_dist = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],  # Different kernel types
    'C': np.logspace(-2, 2, 10),  # Regularization parameter (from 0.01 to 100)
    'gamma': ['scale', 'auto'] + list(np.logspace(-3, 1, 5)),  # Kernel coefficient
    'degree': [2, 3, 4, 5],  # Degree of polynomial kernel function (only for 'poly')
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=svm,
    param_distributions=param_dist,
    n_iter=5,  # Number of random combinations to try
    scoring='accuracy',
    cv=2,
    verbose=2,
    n_jobs=-1,
    random_state=303
)

# Perform random search on training data
random_search.fit(X_train, y_train)

# Print the best parameters and best score
print("Best Parameters:", random_search.best_params_)
print("Best Cross-Validation Score:", random_search.best_score_)

# Use the best estimator to make predictions on validation data
best_svm = random_search.best_estimator_
y_pred = best_svm.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy with Best SVM:", accuracy)

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data with the same scaler used on the training data
test_df_encoded_scaled = scaler.transform(test_df_encoded)
test_df_encoded_scaled = pd.DataFrame(test_df_encoded_scaled, columns=test_df_encoded.columns)

# Make predictions on the test set
predictions = best_svm.predict(test_df_encoded_scaled)

# Create a DataFrame with trip_ID and predictions
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': predictions
})

# Save predictions to a new CSV file
results_df.to_csv('predictions_results_svm_tuned.csv', index=False)
print("Predictions saved to predictions_results_svm_tuned.csv")


Fitting 2 folds for each of 5 candidates, totalling 10 fits


KeyboardInterrupt: 

In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Function to clean column names
def clean_column_names(df):
    df.columns = df.columns.str.replace(r'[\[\]<>,]', '', regex=True)
    return df

# Manually define SVM hyperparameters
svr_model = SVR(kernel='rbf', C=1.0, gamma='scale')  # You can adjust these values as needed

# Train the SVM regressor
svr_model.fit(X_train, y_train)

# Predict on the validation set
svr_predictions = svr_model.predict(X_val)
svr_mae = mean_absolute_error(y_val, svr_predictions)
print("SVM Regressor Mean Absolute Error:", svr_mae)

# Predict on the test set
svr_test_predictions = svr_model.predict(test_df_encoded_scaled)

# Load the test dataset
temp_test_df = pd.read_csv('test.csv')

# SVM Predictions on test set
svr_output = pd.DataFrame({'trip_ID': temp_test_df['trip_ID'], 'category': svr_test_predictions})

# Apply binning to convert regression outputs into categories
svr_output['category'] = pd.cut(svr_output['category'], bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
svr_output.to_csv('submission_svr_only.csv', index=False)

print("Submission for SVM model saved successfully.")


SVM Regressor Mean Absolute Error: 0.3145443155252721
Submission for SVM model saved successfully.


In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Function to clean column names
def clean_column_names(df):
    df.columns = df.columns.str.replace(r'[\[\]<>,]', '', regex=True)
    return df

# Manually define SVM hyperparameters
svr_model = SVR(kernel='rbf', C=1.0, gamma='scale')  # You can adjust these values as needed

# Train the SVM regressor
svr_model.fit(X_train, y_train)

# Predict on the validation set
svr_predictions = svr_model.predict(X_val)
svr_mae = mean_absolute_error(y_val, svr_predictions)
print("SVM Regressor Mean Absolute Error:", svr_mae)

# Predict on the test set
svr_test_predictions = svr_model.predict(test_df_encoded_scaled)

# Load the test dataset
temp_test_df = pd.read_csv('test.csv')

# SVM Predictions on test set
svr_output = pd.DataFrame({'trip_ID': temp_test_df['trip_ID'], 'category': svr_test_predictions})

print(svr_output['category'])
# Apply binning to convert regression outputs into categories
svr_output['category'] = pd.cut(svr_output['category'], bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
svr_output.to_csv('submission_svr_only_tp.csv', index=False)

print("Submission for SVM model saved successfully.")

SVM Regressor Mean Absolute Error: 0.3145443155252721
0       1.140345
1       0.002403
2       0.227088
3       0.219506
4       0.296209
          ...   
5847    1.304772
5848    0.065384
5849    0.270208
5850    1.156263
5851    0.756091
Name: category, Length: 5852, dtype: float64
Submission for SVM model saved successfully.


In [None]:
svr_output['category']

Unnamed: 0,category
0,2
1,0
2,0
3,0
4,0
...,...
5847,2
5848,0
5849,0
5850,2


In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define different kernels to test
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

# Initialize variables to store the best kernel and corresponding MAE
best_kernel = None
best_mae = float('inf')
best_model = None

# Loop through each kernel and evaluate its performance
for kernel in kernels:
    print(f"Training SVR with kernel: {kernel}")
    svr_model = SVR(kernel=kernel, C=1.0, gamma='scale')  # Keep other parameters fixed for now
    svr_model.fit(X_train, y_train)

    # Predict on the validation set
    svr_predictions = svr_model.predict(X_val)
    svr_mae = mean_absolute_error(y_val, svr_predictions)
    print(f"Mean Absolute Error with kernel {kernel}: {svr_mae}")

    # Check if this kernel is the best so far
    if svr_mae < best_mae:
        best_mae = svr_mae
        best_kernel = kernel
        best_model = svr_model

print(f"Best kernel: {best_kernel} with MAE: {best_mae}")

# Predict on the test set using the best model
svr_test_predictions = best_model.predict(test_df_encoded_scaled)

# Load the test dataset
temp_test_df = pd.read_csv('test.csv')

# SVM Predictions on test set
svr_output = pd.DataFrame({'trip_ID': temp_test_df['trip_ID'], 'category': svr_test_predictions})

# Apply binning to convert regression outputs into categories
svr_output['category'] = pd.cut(svr_output['category'], bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
svr_output.to_csv('submission_svr_tuned.csv', index=False)

print("Submission for tuned SVM model saved successfully.")


Training SVR with kernel: linear


KeyboardInterrupt: 

In [None]:
train_df_encoded['category'].value_counts()

Unnamed: 0_level_0,count
category,Unnamed: 1_level_1
0.0,6240
1.0,4943
2.0,1463


In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define different kernels to test
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

# Initialize variables to store the best kernel and corresponding MAE
best_kernel = None
best_mae = float('inf')
best_model = None

# Loop through each kernel and evaluate its performance
for kernel in kernels:
    print(f"Training SVR with kernel: {kernel}")
    svr_model = SVR(kernel=kernel, C=1.0, gamma='scale')  # Keep other parameters fixed for now
    svr_model.fit(X_train, y_train)

    # Predict on the validation set
    svr_predictions = svr_model.predict(X_val)
    svr_mae = mean_absolute_error(y_val, svr_predictions)
    print(f"Mean Absolute Error with kernel {kernel}: {svr_mae}")

    # Check if this kernel is the best so far
    if svr_mae < best_mae:
        best_mae = svr_mae
        best_kernel = kernel
        best_model = svr_model

print(f"Best kernel: {best_kernel} with MAE: {best_mae}")

# Predict on the test set using the best model
svr_test_predictions = best_model.predict(test_df_encoded_scaled)

# Load the test dataset
temp_test_df = pd.read_csv('test.csv')

# SVM Predictions on test set
svr_output = pd.DataFrame({'trip_ID': temp_test_df['trip_ID'], 'category': svr_test_predictions})

# Apply binning to convert regression outputs into categories
svr_output['category'] = pd.cut(svr_output['category'], bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
svr_output.to_csv('submission_svr_tuned.csv', index=False)

print("Submission for tuned SVM model saved successfully.")


Training SVR with kernel: linear
Mean Absolute Error with kernel linear: 0.33002165030410735
Training SVR with kernel: poly
Mean Absolute Error with kernel poly: 0.3318558663207929
Training SVR with kernel: rbf
Mean Absolute Error with kernel rbf: 0.3145443155252721
Training SVR with kernel: sigmoid
Mean Absolute Error with kernel sigmoid: 13.60265789028028
Best kernel: rbf with MAE: 0.3145443155252721
Submission for tuned SVM model saved successfully.


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Assuming `train_df_encoded` is your original dataset
# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=303)

# Initialize and apply the scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Initialize Logistic Regression model
log_reg = LogisticRegression(random_state=303, max_iter=1000)

# Train the model
log_reg.fit(X_train, y_train)

# Predict on the validation set
y_pred = log_reg.predict(X_val)

# Evaluate performance
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_val, y_pred))

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data using the same scaler
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = log_reg.predict(test_df_encoded_scaled)

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_predictions
})

results_df.to_csv('submission_logreg.csv', index=False)
print("Logistic Regression predictions saved to 'submission_logreg.csv'.")


Validation Accuracy: 0.7573

Classification Report:
               precision    recall  f1-score   support

         0.0       0.81      0.87      0.84      1248
         1.0       0.70      0.72      0.71       989
         2.0       0.70      0.42      0.52       293

    accuracy                           0.76      2530
   macro avg       0.74      0.67      0.69      2530
weighted avg       0.75      0.76      0.75      2530

Logistic Regression predictions saved to 'submission_logreg.csv'.


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=303)

# Initialize and apply the scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Define hyperparameter grid
param_grid = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],  # Regularization terms
    'C': [0.01, 0.1, 1, 10, 100],                  # Inverse regularization strength
    'solver': ['saga', 'liblinear', 'lbfgs'],      # Solvers for optimization
    'max_iter': [100, 500, 1000]                   # Maximum number of iterations
}

# Initialize Logistic Regression
log_reg = LogisticRegression(random_state=303)

# Perform Grid Search
grid_search = GridSearchCV(
    estimator=log_reg,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,  # 3-fold cross-validation
    n_jobs=-1,
    verbose=2
)

# Fit GridSearchCV
print("Starting Grid Search...")
grid_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
print("Best Parameters:", best_params)

# Evaluate on the validation set
y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_val, y_pred))

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data using the same scaler
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = best_model.predict(test_df_encoded_scaled)

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_predictions
})

results_df.to_csv('submission_logreg_tuned.csv', index=False)
print("Tuned Logistic Regression predictions saved to 'submission_logreg_tuned.csv'.")


Starting Grid Search...
Fitting 3 folds for each of 180 candidates, totalling 540 fits


KeyboardInterrupt: 

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=303)

# Initialize and apply the scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Define hyperparameter space for RandomizedSearch
param_dist = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],  # Regularization terms
    'C': np.logspace(-4, 4, 20),                    # Wide range of regularization strength
    'solver': ['saga', 'liblinear', 'lbfgs'],       # Solvers for optimization
    'max_iter': [100, 200, 500, 1000]               # Maximum iterations
}

# Initialize Logistic Regression
log_reg = LogisticRegression(random_state=303)

# Perform Randomized Search
random_search = RandomizedSearchCV(
    estimator=log_reg,
    param_distributions=param_dist,
    n_iter=50,               # Number of parameter combinations to try
    scoring='accuracy',      # Metric for optimization
    cv=3,                    # 3-fold cross-validation
    n_jobs=-1,               # Use all available cores
    random_state=303,        # For reproducibility
    verbose=2
)

# Fit RandomizedSearchCV
print("Starting Randomized Search...")
random_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = random_search.best_params_
best_model = random_search.best_estimator_
print("Best Parameters:", best_params)

# Evaluate on the validation set
y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_val, y_pred))

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data using the same scaler
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = best_model.predict(test_df_encoded_scaled)

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_predictions
})

results_df.to_csv('submission_logreg_random_tuned.csv', index=False)
print("Tuned Logistic Regression predictions saved to 'submission_logreg_random_tuned.csv'.")


Starting Randomized Search...
Fitting 3 folds for each of 50 candidates, totalling 150 fits


93 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 75, in _check_solver
   

Best Parameters: {'solver': 'saga', 'penalty': 'l1', 'max_iter': 100, 'C': 10000.0}
Validation Accuracy: 0.7569

Classification Report:
               precision    recall  f1-score   support

         0.0       0.81      0.87      0.84      1248
         1.0       0.70      0.72      0.71       989
         2.0       0.70      0.42      0.52       293

    accuracy                           0.76      2530
   macro avg       0.74      0.67      0.69      2530
weighted avg       0.75      0.76      0.75      2530

Tuned Logistic Regression predictions saved to 'submission_logreg_random_tuned.csv'.




In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert target to categorical if it’s a classification task
num_classes = len(y.unique())
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)

# Build the neural network
def build_model(input_dim, num_classes):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')  # Softmax for classification
    ])
    return model

# Compile the model
model = build_model(input_dim=X_train.shape[1], num_classes=num_classes)
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Evaluate the model
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Classification report
y_pred_val = model.predict(X_val)
y_pred_classes = y_pred_val.argmax(axis=1)
y_val_classes = y_val.argmax(axis=1)
print("\nClassification Report:\n", classification_report(y_val_classes, y_pred_classes))

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)
test_pred_classes = test_predictions.argmax(axis=1)

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_pred_classes
})

results_df.to_csv('submission_nn.csv', index=False)
print("Neural Network predictions saved to 'submission_nn.csv'.")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.6113 - loss: 0.9334 - val_accuracy: 0.7348 - val_loss: 0.6473
Epoch 2/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7168 - loss: 0.6646 - val_accuracy: 0.7415 - val_loss: 0.6227
Epoch 3/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7250 - loss: 0.6429 - val_accuracy: 0.7451 - val_loss: 0.6241
Epoch 4/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7297 - loss: 0.6409 - val_accuracy: 0.7478 - val_loss: 0.6157
Epoch 5/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7326 - loss: 0.6288 - val_accuracy: 0.7407 - val_loss: 0.6173
Epoch 6/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7332 - loss: 0.6307 - val_accuracy: 0.7506 - val_loss: 0.6149
Epoch 7/50
[1m317/317[0m 

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target (continuous)

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Build the neural network
def build_model(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    return model

# Compile the model
model = build_model(input_dim=X_train.shape[1])
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Evaluate the model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_val_pred = model.predict(X_val)
val_mae_score = mean_absolute_error(y_val, y_val_pred)
print(f"Mean Absolute Error on Validation Data: {val_mae_score:.4f}")

# Apply binning to the predictions
y_val_binned = pd.cut(y_val_pred.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)

# Apply binning to the test predictions
test_binned = pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_binned
})

results_df.to_csv('submission_nn_binned.csv', index=False)
print("Neural Network predictions with bins saved to 'submission_nn_binned.csv'.")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - loss: 0.7901 - mean_absolute_error: 0.6773 - val_loss: 0.2490 - val_mean_absolute_error: 0.3805
Epoch 2/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - loss: 0.3358 - mean_absolute_error: 0.4497 - val_loss: 0.2292 - val_mean_absolute_error: 0.3643
Epoch 3/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.2757 - mean_absolute_error: 0.4048 - val_loss: 0.2240 - val_mean_absolute_error: 0.3414
Epoch 4/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.2664 - mean_absolute_error: 0.3897 - val_loss: 0.2205 - val_mean_absolute_error: 0.3502
Epoch 5/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.2460 - mean_absolute_error: 0.3754 - val_loss: 0.2198 - val_mean_absolute_error: 0.3555
Epoch 6/50
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - lo

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target (continuous)

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Optimized Neural Network Function
def build_optimized_model(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        BatchNormalization(),
        Dropout(0.2),  # Reduced dropout for improved learning
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    return model

# Compile the model
model = build_optimized_model(input_dim=X_train.shape[1])
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Add callbacks for optimization
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5, verbose=1)
]

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,  # Train up to 100 epochs with early stopping
    batch_size=64,  # Larger batch size for faster computation
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_val_pred = model.predict(X_val)
val_mae_score = mean_absolute_error(y_val, y_val_pred)
print(f"Mean Absolute Error on Validation Data: {val_mae_score:.4f}")

# Apply binning to the predictions
y_val_binned = pd.cut(y_val_pred.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)

# Apply binning to the test predictions
test_binned = pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_binned
})

results_df.to_csv('submission_nn_binned_optimized.csv', index=False)
print("Optimized Neural Network predictions with bins saved to 'submission_nn_binned_optimized.csv'.")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - loss: 0.9423 - mean_absolute_error: 0.7227 - val_loss: 0.2957 - val_mean_absolute_error: 0.4530 - learning_rate: 0.0010
Epoch 2/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.3369 - mean_absolute_error: 0.4493 - val_loss: 0.2499 - val_mean_absolute_error: 0.3928 - learning_rate: 0.0010
Epoch 3/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.2928 - mean_absolute_error: 0.4120 - val_loss: 0.2306 - val_mean_absolute_error: 0.3753 - learning_rate: 0.0010
Epoch 4/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.2637 - mean_absolute_error: 0.3889 - val_loss: 0.2219 - val_mean_absolute_error: 0.3649 - learning_rate: 0.0010
Epoch 5/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.2563 - mean_absolute_error: 0.3815 - val_loss: 0.2211 - val_mean_absolute_err

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Advanced Neural Network Function
def build_advanced_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),  # Increased dropout for regularization
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    return model

# Compile the model
model = build_advanced_model(input_dim=X_train.shape[1])
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Callbacks for optimization
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)
]

# Train the model with class balancing
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,  # Allow longer training with early stopping
    batch_size=32,  # Smaller batch size for better generalization
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_val_pred = model.predict(X_val)
val_mae_score = mean_absolute_error(y_val, y_val_pred)
print(f"Mean Absolute Error on Validation Data: {val_mae_score:.4f}")

# Apply binning to the predictions
y_val_binned = pd.cut(y_val_pred.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)

# Apply binning to the test predictions
test_binned = pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_binned
})

results_df.to_csv('submission_nn_binned_optimized_2.csv', index=False)
print("Optimized Neural Network predictions with bins saved to 'submission_nn_optimized_binned.csv'.")


Epoch 1/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 16ms/step - loss: 0.7267 - mean_absolute_error: 0.6435 - val_loss: 0.2527 - val_mean_absolute_error: 0.3577 - learning_rate: 0.0010
Epoch 2/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - loss: 0.3204 - mean_absolute_error: 0.4369 - val_loss: 0.2531 - val_mean_absolute_error: 0.3610 - learning_rate: 0.0010
Epoch 3/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - loss: 0.2790 - mean_absolute_error: 0.3995 - val_loss: 0.2386 - val_mean_absolute_error: 0.3388 - learning_rate: 0.0010
Epoch 4/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.2543 - mean_absolute_error: 0.3789 - val_loss: 0.2237 - val_mean_absolute_error: 0.3314 - learning_rate: 0.0010
Epoch 5/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.2424 - mean_absolute_error: 0.3651 - val_loss: 0.2231 - val_mea

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.23, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Advanced Neural Network Function
def build_advanced_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),  # Increased dropout for regularization
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    return model

# Compile the model
model = build_advanced_model(input_dim=X_train.shape[1])
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Callbacks for optimization
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)
]

# Train the model with class balancing
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,  # Allow longer training with early stopping
    batch_size=32,  # Smaller batch size for better generalization
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_val_pred = model.predict(X_val)
val_mae_score = mean_absolute_error(y_val, y_val_pred)
print(f"Mean Absolute Error on Validation Data: {val_mae_score:.4f}")

# Apply binning to the predictions
y_val_binned = pd.cut(y_val_pred.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)

# Apply binning to the test predictions
test_binned = pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_binned
})

results_df.to_csv('submission_nn_binned_optimized_test_size_changed.csv', index=False)
print("Optimized Neural Network predictions with bins saved to 'submission_nn_binned_optimized_test_size_changed.csv'.")


Epoch 1/200
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.6993 - mean_absolute_error: 0.6398 - val_loss: 0.2417 - val_mean_absolute_error: 0.3496 - learning_rate: 0.0010
Epoch 2/200
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3305 - mean_absolute_error: 0.4404 - val_loss: 0.2378 - val_mean_absolute_error: 0.3482 - learning_rate: 0.0010
Epoch 3/200
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.2800 - mean_absolute_error: 0.4032 - val_loss: 0.2227 - val_mean_absolute_error: 0.3501 - learning_rate: 0.0010
Epoch 4/200
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.2581 - mean_absolute_error: 0.3862 - val_loss: 0.2286 - val_mean_absolute_error: 0.3654 - learning_rate: 0.0010
Epoch 5/200
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.2370 - mean_absolute_error: 0.3669 - val_loss: 0.2181 - val_mean_ab

In [20]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.initializers import HeNormal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Advanced Neural Network Function with Best Weight Initialization
def build_advanced_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(256, activation='relu', kernel_initializer=HeNormal()),  # He initialization
        BatchNormalization(),
        Dropout(0.3),
        Dense(128, activation='relu', kernel_initializer=HeNormal()),  # He initialization
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_initializer=HeNormal()),  # He initialization
        Dense(1, activation='linear', kernel_initializer='glorot_uniform')  # Xavier initialization for linear output
    ])
    return model

# Compile the model
model = build_advanced_model(input_dim=X_train.shape[1])
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Callbacks for optimization
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)
]

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_val_pred = model.predict(X_val)
val_mae_score = mean_absolute_error(y_val, y_val_pred)
print(f"Mean Absolute Error on Validation Data: {val_mae_score:.4f}")

# Apply binning to the predictions
y_val_binned = pd.cut(y_val_pred.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)

# Apply binning to the test predictions
test_binned = pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_binned
})

results_df.to_csv('submission_nn_binned_optimized_he.csv', index=False)
print("Optimized Neural Network predictions with bins saved to 'submission_nn_binned_optimized_he.csv'.")


Epoch 1/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - loss: 1.2529 - mean_absolute_error: 0.8255 - val_loss: 0.2871 - val_mean_absolute_error: 0.4145 - learning_rate: 0.0010
Epoch 2/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.3873 - mean_absolute_error: 0.4816 - val_loss: 0.2441 - val_mean_absolute_error: 0.3748 - learning_rate: 0.0010
Epoch 3/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.3107 - mean_absolute_error: 0.4268 - val_loss: 0.2318 - val_mean_absolute_error: 0.3677 - learning_rate: 0.0010
Epoch 4/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.2715 - mean_absolute_error: 0.3971 - val_loss: 0.2302 - val_mean_absolute_error: 0.3477 - learning_rate: 0.0010
Epoch 5/200
[1m317/317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.2614 - mean_absolute_error: 0.3848 - val_loss: 0.2210 - val_mean_ab

In [21]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import HeNormal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd
import numpy as np

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Advanced Neural Network Function with Regularization and Weight Initialization
def build_advanced_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(256, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(1e-4)),
        BatchNormalization(),
        Dropout(0.4),  # Increased dropout for robustness
        Dense(128, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(1e-4)),
        BatchNormalization(),
        Dropout(0.4),
        Dense(64, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(1e-4)),
        Dense(1, activation='linear', kernel_initializer='glorot_uniform')  # Linear for regression
    ])
    return model

# Learning Rate Scheduler
def lr_schedule(epoch):
    initial_lr = 0.001
    if epoch > 50:
        return initial_lr * 0.1
    elif epoch > 100:
        return initial_lr * 0.01
    return initial_lr

# Compile the model
model = build_advanced_model(input_dim=X_train.shape[1])
optimizer = Adam(learning_rate=0.001, clipnorm=1.0)  # Gradient clipping added
model.compile(optimizer=optimizer,
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Callbacks for optimization
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1),
    LearningRateScheduler(lr_schedule, verbose=1)
]

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=150,  # Slightly reduced epochs with LR scheduling
    batch_size=64,  # Larger batch size for better computation
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_val_pred = model.predict(X_val)
val_mae_score = mean_absolute_error(y_val, y_val_pred)
print(f"Mean Absolute Error on Validation Data: {val_mae_score:.4f}")

# Apply binning to the predictions
y_val_binned = pd.cut(y_val_pred.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Load the test data
temp_test_df = pd.read_csv('test.csv')

# Scale the test data
test_df_encoded_scaled = scaler.transform(test_df_encoded)

# Predict on the test set
test_predictions = model.predict(test_df_encoded_scaled)

# Apply binning to the test predictions
test_binned = pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])

# Save predictions to a CSV file
results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': test_binned
})

results_df.to_csv('submission_nn_optimized_3.csv', index=False)
print("Optimized Neural Network predictions with bins saved to 'submission_nn_optimized_3.csv'.")



Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/150
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 12ms/step - loss: 1.4066 - mean_absolute_error: 0.8577 - val_loss: 0.4432 - val_mean_absolute_error: 0.4340 - learning_rate: 0.0010

Epoch 2: LearningRateScheduler setting learning rate to 0.001.
Epoch 2/150
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 0.4999 - mean_absolute_error: 0.4993 - val_loss: 0.3830 - val_mean_absolute_error: 0.3862 - learning_rate: 0.0010

Epoch 3: LearningRateScheduler setting learning rate to 0.001.
Epoch 3/150
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 0.4073 - mean_absolute_error: 0.4370 - val_loss: 0.3309 - val_mean_absolute_error: 0.3381 - learning_rate: 0.0010

Epoch 4: LearningRateScheduler setting learning rate to 0.001.
Epoch 4/150
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 0.3674 - mean_abs

In [20]:
pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [21]:
pip install optuna-integration[tfkeras]

Collecting optuna-integration[tfkeras]
  Downloading optuna_integration-4.1.0-py3-none-any.whl.metadata (12 kB)
Downloading optuna_integration-4.1.0-py3-none-any.whl (97 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.4/97.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: optuna-integration
Successfully installed optuna-integration-4.1.0


In [None]:
import optuna
from optuna.integration import TFKerasPruningCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import HeNormal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import pandas as pd

# Define features and target
X = train_df_encoded.drop(columns=['category'])  # Features
y = train_df_encoded['category']  # Target

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=303)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Define Optuna objective function
def objective(trial):
    # Define hyperparameters to tune
    n_units_1 = trial.suggest_int('n_units_1', 128, 512, step=64)  # Layer 1 units
    n_units_2 = trial.suggest_int('n_units_2', 64, 256, step=32)   # Layer 2 units
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5, step=0.1)
    l2_reg = trial.suggest_float('l2_reg', 1e-5, 1e-3, log=True)   # L2 regularization
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 128, step=16) # Batch size

    # Build the model
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(n_units_1, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(n_units_2, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(1, activation='linear', kernel_initializer='glorot_uniform')  # Regression output
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mean_absolute_error'])

    # Train the model with early stopping
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=batch_size,
        callbacks=[TFKerasPruningCallback(trial, 'val_loss')],
        verbose=0
    )

    # Evaluate the model
    val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
    return val_mae  # Minimize MAE

# Run the Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Output the best hyperparameters
print("Best hyperparameters: ", study.best_params)

# Train the final model with best hyperparameters
best_params = study.best_params
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(best_params['n_units_1'], activation='relu', kernel_initializer=HeNormal(),
          kernel_regularizer=l2(best_params['l2_reg'])),
    BatchNormalization(),
    Dropout(best_params['dropout_rate']),
    Dense(best_params['n_units_2'], activation='relu', kernel_initializer=HeNormal(),
          kernel_regularizer=l2(best_params['l2_reg'])),
    BatchNormalization(),
    Dropout(best_params['dropout_rate']),
    Dense(1, activation='linear', kernel_initializer='glorot_uniform')
])

model.compile(optimizer=Adam(learning_rate=best_params['learning_rate']),
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Train the optimized model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=best_params['batch_size'],
    callbacks=[
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
    ],
    verbose=1
)

# Evaluate the optimized model
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f"Optimized Model Validation MAE: {val_mae:.4f}")

# Save predictions to a CSV file
y_val_pred = model.predict(X_val)
test_predictions = model.predict(scaler.transform(test_df_encoded))

results_df = pd.DataFrame({
    'trip_ID': temp_test_df['trip_ID'],
    'category': pd.cut(test_predictions.flatten(), bins=[-float('inf'), 0.6, 1.3, float('inf')], labels=[0, 1, 2])
})
results_df.to_csv('submission_nn_optimized_optuna.csv', index=False)
print("Predictions saved to 'submission_nn_optimized_optuna.csv'.")


[I 2024-11-28 05:09:08,114] A new study created in memory with name: no-name-d4f77e75-4418-4302-b660-1fd1e30a72c1
[I 2024-11-28 05:09:44,957] Trial 0 finished with value: 0.3428192138671875 and parameters: {'n_units_1': 512, 'n_units_2': 224, 'dropout_rate': 0.30000000000000004, 'l2_reg': 0.0009612830523963407, 'learning_rate': 0.0003695540570307106, 'batch_size': 112}. Best is trial 0 with value: 0.3428192138671875.
[I 2024-11-28 05:10:08,751] Trial 1 finished with value: 0.3376930058002472 and parameters: {'n_units_1': 384, 'n_units_2': 96, 'dropout_rate': 0.2, 'l2_reg': 0.0005416555466629026, 'learning_rate': 0.00034288290055904853, 'batch_size': 96}. Best is trial 1 with value: 0.3376930058002472.
[I 2024-11-28 05:10:31,640] Trial 2 finished with value: 0.34121254086494446 and parameters: {'n_units_1': 512, 'n_units_2': 160, 'dropout_rate': 0.4, 'l2_reg': 1.0300315275292269e-05, 'learning_rate': 0.00033122767660977035, 'batch_size': 96}. Best is trial 1 with value: 0.33769300580024