In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from itertools import product

# Load the data
data = pd.read_csv('jobs_in_data.csv')

# Drop the 'salary_in_usd' column since we're ignoring it
data.drop(columns=['salary_in_usd'], inplace=True)

# Encode categorical variables
data = pd.get_dummies(data)
# Separate features and target
X = data.drop(columns=['salary'])
y = data['salary'] / 1000  # Divide the 'salary' column by 1000 here

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network model
def create_model(learning_rate, dropout):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

# Define the hyperparameters to search
learning_rates = [0.01, 0.001, 0.0001]
dropouts = [0, 0.2, 0.5]
epochs = [50, 100, 200]

# Perform grid search
results = []

for lr, dropout_rate, epoch in product(learning_rates, dropouts, epochs):
    model = create_model(lr, dropout_rate)
    model.fit(X_train_scaled, y_train, epochs=epoch, batch_size=32, verbose=0)
    loss = model.evaluate(X_test_scaled, y_test, verbose=0)
    results.append((lr, dropout_rate, epoch, loss))

# Create a DataFrame to display the results
results_df = pd.DataFrame(results, columns=['Learning Rate', 'Dropout Rate', 'Epochs', 'Test Loss'])
print(results_df)

# Find the combination with the lowest test loss
best_combination = results_df.loc[results_df['Test Loss'].idxmin()]
print("\nBest Combination:")
print(best_combination)




    Learning Rate  Dropout Rate  Epochs    Test Loss
0          0.0100           0.0      50  2555.965820
1          0.0100           0.0     100  2646.629883
2          0.0100           0.0     200  2549.389893
3          0.0100           0.2      50  2601.021973
4          0.0100           0.2     100  2615.562744
5          0.0100           0.2     200  2579.971680
6          0.0100           0.5      50  2593.843506
7          0.0100           0.5     100  2717.121338
8          0.0100           0.5     200  2571.627197
9          0.0010           0.0      50  2597.697754
10         0.0010           0.0     100  2630.652344
11         0.0010           0.0     200  2577.537354
12         0.0010           0.2      50  2651.308594
13         0.0010           0.2     100  2643.282471
14         0.0010           0.2     200  2592.799561
15         0.0010           0.5      50  2580.681885
16         0.0010           0.5     100  2609.062256
17         0.0010           0.5     200  265