In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers, regularizers

# Load train and test datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Checking information of the train data
train_df.info()

# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}

# List of categorical columns to encode
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']

# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value
    else:
        return value 

# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)

# Clean and convert 'running' column to numeric values
train_df['running'] = train_df['running'].str.replace('miles', '').str.strip()
test_df['running'] = test_df['running'].str.replace('miles', '').str.strip()
train_df['running'] = pd.to_numeric(train_df['running'], errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'], errors='coerce').fillna(0).astype('int64')

# Drop duplicates from the training data
train_df = train_df.drop_duplicates()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              1 non-null      int64 
 1   original_text   1 non-null      object
 2   rewrite_prompt  1 non-null      object
 3   rewritten_text  1 non-null      object
dtypes: int64(1), object(3)
memory usage: 164.0+ bytes


KeyError: 'model'

In [102]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   int32  
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   int32  
 3   running       1642 non-null   int64  
 4   wheel         1642 non-null   int32  
 5   color         1642 non-null   int32  
 6   type          1642 non-null   int32  
 7   status        1642 non-null   int32  
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int32(6), int64(3)
memory usage: 89.9 KB


In [6]:

# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis = 1) # Replace 'target_column' with your actual target column name
y = train_df['price']  # Your target variable

# Split the data into training and validation sets
# X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

KeyError: "['price', 'wheel'] not found in axis"

In [106]:
import tensorflow as tf
print(tf.__version__)

2.17.0


In [17]:
pip install keras

Note: you may need to restart the kernel to use updated packages.


In [27]:
pip install tensorflow


Note: you may need to restart the kernel to use updated packages.


In [37]:
pip install scikeras[tensorflow-cpu]

Collecting scikeras[tensorflow-cpu]
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0
Note: you may need to restart the kernel to use updated packages.




In [108]:
from scikeras.wrappers import KerasRegressor

In [35]:

# from tensorflow.keras.wrappers.scikit_learn import KerasRegressor


ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [127]:
X_train.shape, X_temp.shape, y_train.shape, y_temp.shape

((1306, 8), (327, 8), (1306,), (327,))

In [129]:
X_test, X_val, y_test, y_val = train_test_split( X_temp, X_temp, test_size=0.5, random_state=42)

In [131]:
X_test.shape, X_val.shape, y_test.shape, y_val.shape

((163, 8), (164, 8), (163, 8), (164, 8))

In [133]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_temp_scaled = scaler.transform(X_temp)
X_val_scaled = scaler.transform(X_val)
X_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))  # Adjust as necessary

In [110]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))

In [11]:
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor


ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [80]:
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
from keras import layers, models
 
def create_model(learning_rate=0.01):
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(X.shape[1],)))
    model.add(layers.Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model
 
model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=10, learning_rate=0.001, verbose=0)
# `KerasRegressor(learning_rate=0.001)
param_grid = {'learning_rate': [0.001, 0.01, 0.1]}
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, y)
 
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best: -0.24610183085594875 using {'learning_rate': 0.1}


In [134]:
optimizer = keras.optimizers.Adam(learning_rate=0.1)
model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mae'])

AttributeError: 'KerasRegressor' object has no attribute 'compile'

In [140]:
from sklearn.model_selection import RandomizedSearchCV
from scikeras.wrappers import KerasRegressor
from keras import layers, models
 
def create_model(learning_rate=0.01):
    model = keras.Sequential([
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)),
    # layers.Dropout(0.3),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    # layers.Dropout(0.3),
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dense(1)])  # Output layer for regression
    return model
 
model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=10, learning_rate=0.001, verbose=0)
# `KerasRegressor(learning_rate=0.001)
# param_grid = {'batch_size': [10, 20, 40],
#     'epochs': [50, 100],
#     'learning_rate': [0.001, 0.01, 0.1]}
grid = RandomizedSearchCV(estimator=model, param_distributions= {'batch_size': [10, 20, 40],
    'epochs': [50, 100],
    'learning_rate': [0.001, 0.01, 0.1]}, n_jobs=-1, cv=3)
grid_result = grid.fit(X, y)
 
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

ValueError: 
All the 30 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Rehana\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Rehana\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 770, in fit
    self._fit(
  File "C:\Users\Rehana\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 928, in _fit
    self._ensure_compiled_model()
  File "C:\Users\Rehana\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 446, in _ensure_compiled_model
    raise ValueError("You must provide a loss or a compiled model")
ValueError: You must provide a loss or a compiled model


In [148]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV

# Function to create the model
def create_model(learning_rate=0.01):
    model = Sequential()
    model.add(Dense(30, activation='relu', input_shape=(X.shape[1],)))  # Ensure correct input shape
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mae')  # Ensure model is compiled with loss
    return model

# Create KerasRegressor
model = KerasRegressor(build_fn=create_model, learning_rate=0.01, verbose=0)

# Define the parameter distributions
param_distributions = {
    'batch_size': [10, 20, 40],
    'epochs': [50, 100],
    'learning_rate': [0.001, 0.01, 0.1]
}

# Create the RandomizedSearchCV object
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
grid_result = grid.fit(X, y)

# Print the best parameters and score
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best: 0.46881758714277044 using {'learning_rate': 0.001, 'epochs': 100, 'batch_size': 10}


In [None]:
from sklearn.model_selection import RandomizedSearchCV

# Define the model
model = KerasRegressor(build_fn=create_model, verbose=0)

# Define the parameter distributions
param_distributions = {
    'batch_size': [10, 20, 40],
    'epochs': [50, 100],
    'learning_rate': [0.001, 0.01, 0.1]
}

# Create the RandomizedSearchCV object
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
grid_result = grid.fit(x_train_scaled, y_train)

# Print the best parameters and best score
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')


In [122]:
from sklearn.model_selection import RandomizedSearchCV
 
param_distributions = {
    'batch_size': [10, 20, 40],
    'epochs': [50, 100],
    'learning_rate': [0.001, 0.01, 0.1]
}

model = KerasRegressor(build_fn=model, epochs=100, batch_size=10, verbose=0)
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_iter=5, cv=3, verbose=1)
random_search_result = random_search.fit(X, y)
 
# print(f'Best: {random_search_result.best_score_} using {random_search_result.best_params_}')

Fitting 3 folds for each of 5 candidates, totalling 15 fits


ValueError: Invalid parameter learning_rate for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(learning_rate=0.1)`
Check the list of available parameters with `estimator.get_params().keys()`

In [137]:
import warnings

warnings.filterwarnings('ignore')

In [154]:

# Define the deep learning model with improved architecture
model = keras.Sequential([
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)),
    # layers.Dropout(0.3),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    # layers.Dropout(0.3),
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dense(1)  # Output layer for regression
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [156]:

# Compile the model with a learning rate scheduler
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mae'])


In [152]:
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mae'])

AttributeError: 'KerasRegressor' object has no attribute 'compile'

In [49]:

# # Fit the model with early stopping
# early_stopping = keras.callbacks.EarlyStopping(monitor='val_mae', patience=10, restore_best_weights=True)


In [158]:
model.fit(X, y,  epochs = 100, batch_size = 10)

Epoch 1/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 15790.1768 - mae: 15786.6963
Epoch 2/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3995.0793 - mae: 3972.5393
Epoch 3/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3286.3457 - mae: 3263.3560
Epoch 4/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2978.4805 - mae: 2955.5396
Epoch 5/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3022.5305 - mae: 2999.7568
Epoch 6/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3008.3120 - mae: 2985.6716
Epoch 7/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 2882.6902 - mae: 2860.2097
Epoch 8/100
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2820.1829 - mae: 2797.7700
Epoch 9/100
[1m165/16

<keras.src.callbacks.history.History at 0x1e63f03c080>

In [88]:
model.fit(X, y,
                    epochs=200, batch_size=32, verbose=1)

Epoch 1/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 8129.1431 - mae: 8104.6016
Epoch 2/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3731.0955 - mae: 3691.5864
Epoch 3/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3575.1614 - mae: 3537.0552
Epoch 4/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3404.4873 - mae: 3370.9392
Epoch 5/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 3533.2515 - mae: 3500.7925
Epoch 6/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 3759.2654 - mae: 3728.6338
Epoch 7/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3829.9690 - mae: 3801.7749
Epoch 8/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3356.8799 - mae: 3328.5056
Epoch 9/200
[1m52/52[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1e63a536ea0>

In [143]:

model.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), 
                    epochs=200, batch_size=32, verbose=1)

# callbacks=[early_stopping]

Epoch 1/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 15909.5254 - mae: 15906.9775 - val_loss: 9574.1611 - val_mae: 9571.6182
Epoch 2/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15959.2031 - mae: 15956.2773 - val_loss: 9860.4951 - val_mae: 9855.4814
Epoch 3/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 15102.1104 - mae: 15095.7373 - val_loss: 11736.2207 - val_mae: 11724.7109
Epoch 4/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 11280.0928 - mae: 11266.3828 - val_loss: 18136.1836 - val_mae: 18114.5742
Epoch 5/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4507.8594 - mae: 4484.5669 - val_loss: 20060.3516 - val_mae: 20036.4375
Epoch 6/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3637.7019 - mae: 3613.7273 - val_loss: 19988.3887 - val_mae: 19964.5527
Epoch

<keras.src.callbacks.history.History at 0x171c5355820>

In [160]:
# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X, y)
print(f'Validation Mean Absolute Error: {val_mae:.2f}')

[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2146.6504 - mae: 2112.3459
Validation Mean Absolute Error: 2099.50


In [64]:
# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X_val_scaled, y_val)
print(f'Validation Mean Absolute Error: {val_mae:.2f}')

NameError: name 'X_val_scaled' is not defined

In [162]:

# Make predictions on the test set
y_predict = model.predict(X_scaled)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [166]:
# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])

# Display predictions
print(df_predictions)



            price
0    19677.423828
1    17716.781250
2    20111.156250
3    15289.390625
4     8173.140625
..            ...
406  25007.714844
407  13668.597656
408  15741.763672
409  19339.289062
410  13782.019531

[411 rows x 1 columns]


In [168]:
combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

combined_df

Unnamed: 0,Id,price
0,0,19677.423828
1,1,17716.781250
2,2,20111.156250
3,3,15289.390625
4,4,8173.140625
...,...,...
406,406,25007.714844
407,407,13668.597656
408,408,15741.763672
409,409,19339.289062


In [170]:
combined_df.to_csv('foo.csv', index=False)

In [92]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from tensorflow.keras import layers, regularizers, Model, Input, optimizers
from scikeras.wrappers import KerasRegressor
import warnings
 
# Load train and test datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
 
# Checking information of the train data
train_df.info()
 
# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']
 
# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)
 
# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value
    return value
 
# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)
 
# Clean and convert 'running' column to numeric values
train_df['running'] = pd.to_numeric(train_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')
 
# Drop duplicates from the training data
train_df = train_df.drop_duplicates()
 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB


In [94]:
# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis=1)
y = train_df['price']
 
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X) # Scale training data
X_test_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel'])) # Scale test data
 
warnings.filterwarnings('ignore')


In [74]:

# Function to create the Keras model using Functional API
def create_model(learning_rate=0.01):
    inputs = Input(shape=(X.shape[1],))
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
    x = layers.BatchNormalization()(x) # Add Batch Normalization
    x = layers.Dropout(0.3)(x) # Add Dropout to prevent overfitting
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    outputs = layers.Dense(1)(x) # Output layer for regression
 
    model = Model(inputs=inputs, outputs=outputs)
    optimizer = optimizers.Adam(learning_rate=learning_rate) # Use Adam optimizer with learning rate as argument
    model.compile(optimizer=optimizer, loss='mae') # Compile with Mean Absolute Error
    return model
 
# Create KerasRegressor
model = KerasRegressor(build_fn=create_model, verbose=0, learning_rate=0.1)
 
# Define the parameter grid for RandomizedSearchCV
param_distributions = {
    'learning_rate': [0.001, 0.01, 0.1], # Update with a range of values you want to try
    'batch_size': [16, 32, 64, 128],
    'epochs': [50, 100, 150]
}
 
# Create the RandomizedSearchCV object
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
 
# Fit the model
grid_result = grid.fit(X_scaled, y)
 
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')
 
# Use the best model for predictions
best_model = grid_result.best_estimator_
best_params = grid.best_params_
print("Best parameters found: ", best_params)
 


Best: 0.6994995276133219 using {'learning_rate': 0.1, 'epochs': 150, 'batch_size': 128}
Best parameters found:  {'learning_rate': 0.1, 'epochs': 150, 'batch_size': 128}


In [59]:
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Function to create the Keras model using Functional API
def create_model(learning_rate=0.01):
    inputs = Input(shape=(X.shape[1],))
    
    # First Dense layer with regularization and activation
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
    x = layers.BatchNormalization()(x)  # Batch Normalization to stabilize training
    x = layers.Dropout(0.3)(x)  # Dropout for regularization
    
    # Second Dense layer with more units
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)  # Increase Dropout rate
    
    # Third Dense layer with regularization
    x = layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)  # Increase Dropout rate
    
    # Output layer for regression
    outputs = layers.Dense(1)(x)
    
    # Create model
    model = Model(inputs=inputs, outputs=outputs)
    
    # Using Exponential Decay for learning rate schedule
    lr_schedule = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=10000, decay_rate=0.96)
    optimizer = optimizers.Adam(learning_rate=lr_schedule)
    
    # Compile model with MAE loss
    model.compile(optimizer=optimizer, loss='mae')
    
    return model

# Create KerasRegressor
model = KerasRegressor(build_fn=create_model, verbose=0, learning_rate=0.01)

# Define the parameter grid for RandomizedSearchCV with more parameters
param_distributions = {
    'learning_rate': [0.0001, 0.001, 0.01, 0.1],  # Wider range of learning rates
    'batch_size': [16, 32, 64],
    'epochs': [50, 100, 200],  # Explore higher epochs
}

# Use RandomizedSearchCV for hyperparameter tuning
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=5, n_iter=20)  # Increase n_iter and cv for better search

# Callbacks to improve accuracy
callbacks = [ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, min_lr=1e-6)]

# Fit the model
grid_result = grid.fit(X_scaled, y, callbacks=callbacks)

# Get best model and parameters
best_model = grid_result.best_estimator_
best_params = grid_result.best_params_

print(f'Best: {grid_result.best_score_} using {best_params}')
print("Best parameters found: ", best_params)

# Use the best model for predictions
y_pred = best_model.predict(X_scaled)


Best: 0.760139548778534 using {'learning_rate': 0.01, 'epochs': 200, 'batch_size': 64}
Best parameters found:  {'learning_rate': 0.01, 'epochs': 200, 'batch_size': 64}


In [102]:
# Create the final model using the best parameters
final_model = create_model(learning_rate=best_params['learning_rate'])
 
# Fit the final model using the entire dataset and the best batch_size and epochs
final_model.fit(X_scaled, y, batch_size=best_params['batch_size'], epochs=best_params['epochs'])

Epoch 1/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 16115.4570
Epoch 2/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15930.3398
Epoch 3/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 16045.2578 
Epoch 4/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15682.0537
Epoch 5/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15719.2939 
Epoch 6/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15747.2305 
Epoch 7/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15425.9717 
Epoch 8/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15209.1855 
Epoch 9/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15150.0420 
Epoch 10/200
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x2270537a150>

In [105]:
# Evaluate the final model on a test set (assuming you have test data)
test_loss = final_model.evaluate(X_scaled, y)
print("Test loss:", test_loss)
 


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1950.8011 
Test loss: 1881.4302978515625


In [113]:
# Predict on new data (assuming X_test_scaled is your new input data)
y_predict = final_model.predict(X_test_scaled)
 
# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])
 
# Display predictions
print(df_predictions)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
            price
0    17517.896484
1    15889.663086
2    21938.046875
3    14278.369141
4     5364.213379
..            ...
406  23094.277344
407  13176.238281
408  13364.712891
409  16093.551758
410  12776.750977

[411 rows x 1 columns]


In [115]:

# Combine the predictions with the test data IDs
combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

In [117]:
 
combined_df.to_csv('Rant.csv', index=False)