In [167]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers, regularizers

# Load train and test datasets
train_df = pd.read_csv('train 2.csv')
test_df = pd.read_csv('test.csv')

# Checking information of the train data
train_df.info()

# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}

# List of categorical columns to encode
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']

# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value
    else:
        return value 

# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)

# Clean and convert 'running' column to numeric values
train_df['running'] = train_df['running'].str.replace('miles', '').str.strip()
test_df['running'] = test_df['running'].str.replace('miles', '').str.strip()
train_df['running'] = pd.to_numeric(train_df['running'], errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'], errors='coerce').fillna(0).astype('int64')

# Drop duplicates from the training data
train_df = train_df.drop_duplicates()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB


In [169]:
# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis = 1) # Replace 'target_column' with your actual target column name
y = train_df['price']  # Your target variable

In [171]:
from scikeras.wrappers import KerasRegressor

scaler = StandardScaler()
X_feature_scaled = scaler.fit_transform(X)
X_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))

import warnings
warnings.filterwarnings('ignore')

In [173]:
from keras.models import Sequential
from keras.layers import Dense
from keras.regularizers import l2
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV

# Define a function to create the model
def create_model(input_dim, l2_reg=0.001):  # Add input_dim as a parameter
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=l2(l2_reg)))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(l2_reg)))
    model.add(Dense(1))  # Output layer
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# Set the input dimension based on your data
input_dim = X_scaled.shape[1]

# Create the KerasRegressor with the input dimension
model = KerasRegressor(model=create_model, input_dim=input_dim, l2_reg=0.01)  # Default l2_reg value

# Define the hyperparameter grid
param_distributions = {
    'l2_reg': [0.01, 0.001, 0.0001],
    'batch_size': [16, 32, 64],
    'epochs': [50, 100],
}

# Initialize RandomizedSearchCV
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model with validation set
grid_result = grid.fit(X_feature_scaled, y, callbacks=[early_stopping, reduce_lr])

# Print the best parameters and score
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')


Epoch 1/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 957us/step - loss: 318120960.0000
Epoch 2/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 942us/step - loss: 298202208.0000
Epoch 3/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 947us/step - loss: 277265728.0000
Epoch 4/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 821us/step - loss: 240112864.0000
Epoch 5/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 892us/step - loss: 175440240.0000
Epoch 6/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 909us/step - loss: 127504256.0000
Epoch 7/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 883us/step - loss: 81587312.0000
Epoch 8/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 845us/step - loss: 54125340.0000
Epoch 9/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 828us/step - loss: 46141752.

In [65]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras import regularizers, optimizers, callbacks

# Function to create the model
def create_model(learning_rate=0.01):
    model = Sequential()
    model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
    model.add(BatchNormalization())  # Add Batch Normalization
    model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
    model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(1))  # Output layer for regression
    optimizer = optimizers.Adam(learning_rate=learning_rate)  # Use Adam optimizer with learning rate as argument
    model.compile(optimizer=optimizer, loss='mae')  # Compile with Mean Absolute Error
    return model

# Create KerasRegressor and set the learning_rate through constructor argument
model = KerasRegressor(build_fn=create_model, verbose=0)

# Define the parameter distributions (excluding learning_rate here)
param_distributions = {
    'batch_size': [16, 32, 64],
    'epochs': [50, 100, 150],

   
}

# Callbacks: Early stopping and learning rate reduction
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# Create the RandomizedSearchCV object (learning_rate is set within create_model)
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model with validation set
grid_result = grid.fit(X, y, callbacks=[early_stopping, reduce_lr])

# Print the best parameters and score
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')


Best: 0.7317586739857992 using {'epochs': 100, 'batch_size': 64}


In [145]:
 model = Sequential()
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
model.add(BatchNormalization())  # Add Batch Normalization
model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1))  # Output layer for regression

In [147]:
optimizer = optimizers.Adam(learning_rate=0.01)  # Use Adam optimizer with learning rate as argument
model.compile(optimizer=optimizer, loss='mae')  # Compile with Mean Absolute Error

In [149]:
model.fit(X, y,  epochs = 100, batch_size = 16)

Epoch 1/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 15958.1553
Epoch 2/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 15688.6660
Epoch 3/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 15458.8271
Epoch 4/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 14570.3516
Epoch 5/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 13112.9414
Epoch 6/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 10325.1416
Epoch 7/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 7999.9316
Epoch 8/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 5988.8955
Epoch 9/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 5298.1313
Epoch 10/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1f609478410>

In [153]:
# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X, y)
print(f'Validation Mean Absolute Error: {val_mae:.2f}')

[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 925us/step - loss: 2189.8477


TypeError: cannot unpack non-iterable float object

In [155]:
# Make predictions on the test set
y_predict = model.predict(X_scaled)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [157]:
# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])

# Display predictions
print(df_predictions)



combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

combined_df



combined_df.to_csv('force.csv', index=False)

            price
0    17310.279297
1    16757.291016
2    19290.808594
3    14280.497070
4     2475.139404
..            ...
406  22002.427734
407  12792.098633
408  13212.690430
409  15971.018555
410  12288.930664

[411 rows x 1 columns]


In [105]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers, regularizers

# Load train and test datasets
train_df = pd.read_csv('train 2.csv')
test_df = pd.read_csv('test.csv')

# Checking information of the train data
train_df.info()

# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}

# List of categorical columns to encode
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']

# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value
    else:
        return value 

# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)

# Clean and convert 'running' column to numeric values
train_df['running'] = train_df['running'].str.replace('miles', '').str.strip()
test_df['running'] = test_df['running'].str.replace('miles', '').str.strip()
train_df['running'] = pd.to_numeric(train_df['running'], errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'], errors='coerce').fillna(0).astype('int64')

# Drop duplicates from the training data
train_df = train_df.drop_duplicates()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB


In [107]:
# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis = 1) # Replace 'target_column' with your actual target column name
y = train_df['price']  # Your target variable


In [109]:

from scikeras.wrappers import KerasRegressor

scaler = StandardScaler()
X = scaler.fit_transform(X)
X_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))

import warnings
warnings.filterwarnings('ignore')

In [111]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras import regularizers, optimizers, callbacks
 
# Function to create the model
def create_model(learning_rate=0.01):
    model = Sequential()
    model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
    model.add(BatchNormalization())  # Add Batch Normalization
    model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
    model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(1))  # Output layer for regression
    optimizer = optimizers.Adam(learning_rate=learning_rate)  # Use Adam optimizer with learning rate as argument
    model.compile(optimizer=optimizer, loss='mae')  # Compile with Mean Absolute Error
    return model
 
# Create KerasRegressor and set the learning_rate through constructor argument
model = KerasRegressor(build_fn=create_model, verbose=0)
 
# Define the parameter distributions (excluding learning_rate here)
param_distributions = {
    'batch_size': [16, 32, 64],  # Increase the batch size for smoother updates
    'epochs': [50, 100, 150]
}
 
# Callbacks: Early stopping and learning rate reduction
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
 
# Create the RandomizedSearchCV object (learning_rate is set within create_model)
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
 
# Fit the model with validation set
grid_result = grid.fit(X, y, callbacks=[early_stopping, reduce_lr])
 
# Print the best parameters and score
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

Best: 0.7199750542640686 using {'epochs': 100, 'batch_size': 64}


In [113]:
model = Sequential()
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
model.add(BatchNormalization())  # Add Batch Normalization
model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1))  # Output layer for regression

In [115]:
optimizer = optimizers.Adam(learning_rate=0.01)  # Use Adam optimizer with learning rate as argument
model.compile(optimizer=optimizer, loss='mae')  # Compile with Mean Absolute Error

In [129]:
model.fit(X, y,  epochs = 150, batch_size = 64)

Epoch 1/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2634.9043
Epoch 2/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2645.4524 
Epoch 3/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2556.4038 
Epoch 4/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2575.9062 
Epoch 5/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2548.7744 
Epoch 6/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2653.3000 
Epoch 7/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2498.0627
Epoch 8/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2629.1504 
Epoch 9/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2529.3262 
Epoch 10/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x1f6089a3020>

In [131]:
# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X, y)
print(f'Validation Mean Absolute Error: {val_mae:.2f}')

[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 940us/step - loss: 1907.9863


TypeError: cannot unpack non-iterable float object

In [133]:
# Make predictions on the test set
y_predict = model.predict(X_scaled)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [135]:

# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])

# Display predictions
print(df_predictions)



combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

combined_df



combined_df.to_csv('fire.csv', index=False)



            price
0    17765.626953
1    16725.203125
2    23507.261719
3    15149.442383
4     5595.704590
..            ...
406  21125.187500
407  13493.102539
408  13068.635742
409  17024.529297
410  13230.233398

[411 rows x 1 columns]


In [213]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from scikeras.wrappers import KerasRegressor
import warnings

# Load train and test datasets
train_df = pd.read_csv('train 2.csv')
test_df = pd.read_csv('test.csv')

# Checking information of the train data
train_df.info()

# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']

# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value 
    return value 

# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)

# Clean and convert 'running' column to numeric values
train_df['running'] = pd.to_numeric(train_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')

# Drop duplicates from the training data
train_df = train_df.drop_duplicates()

# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis=1)
y = train_df['price']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # Scale training data
X_test_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))  # Scale test data

# Suppress warnings
warnings.filterwarnings('ignore')


from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV

# Define your model function
def create_model(learning_rate=0.001, num_units=32, num_layers=1):
    model = Sequential()
    model.add(Dense(num_units, activation='relu', input_dim=X_scaled.shape[1]))  # Input layer
    for _ in range(num_layers - 1):  # Additional hidden layers
        model.add(Dense(num_units, activation='relu'))
    model.add(Dense(1))  # Output layer
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error', metrics=['mae'])
    return model

# Create the KerasRegressor
model = KerasRegressor(model=create_model, epochs=100, batch_size=10)

# Define parameter distributions
param_distributions = {
    'model__learning_rate': [0.001, 0.01, 0.1],
    'model__num_units': [32, 64],
    'model__num_layers': [1, 2, 3]
}

# Create the RandomizedSearchCV object
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model
grid_result = grid.fit(X_scaled, y)

# Print the best parameters and score
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

# Retrieve the best KerasRegressor from the grid search
best_model = grid_result.best_estimator_

# Access the underlying Keras model
keras_model = best_model.model  # This gives you the underlying Keras model

# Evaluate the best model on the training set
val_loss, val_mae = keras_model.evaluate(X_scaled, y, verbose=0)  # Use original X and y for evaluation
print(f'Validation Mean Absolute Error: {val_mae:.2f}')

# Make predictions on the test set
y_predict = keras_model.predict(X_scaled)

# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])

# Combine predictions with test IDs
combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

# Save predictions to CSV
combined_df.to_csv('fathi.csv', index=False)





# # Function to create the Keras model
# def create_model(learning_rate=0.01):
#     model = keras.Sequential([
#         layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X_scaled.shape[1],)),
#         layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
#         layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
#         layers.Dense(1)  # Output layer for regression
#     ])
#     optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
#     model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mae'])  # Compile with MAE
#     return model

# # Create KerasRegressor
# model = KerasRegressor(build_fn=create_model, verbose=0)

# # Define the parameter distributions for RandomizedSearchCV
# param_distributions = {
#     'batch_size': [10, 20, 40],
#     'epochs': [50, 100],
#     'learning_rate': [0.001, 0.01, 0.1]
# }

# # Create the RandomizedSearchCV object
# grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
# grid_result = grid.fit(X_scaled, y)

# # Print the best parameters and score
# print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

# # Use the best model for predictions
# best_model = grid_result.best_estimator_

# # Evaluate the best model on the training set
# val_loss, val_mae = best_model.evaluate(X_scaled, y)
# print(f'Validation Mean Absolute Error: {val_mae:.2f}')

# # Make predictions on the test set
# y_predict = best_model.predict(X_test_scaled)

# # Create a DataFrame for the predictions
# df_predictions = pd.DataFrame(y_predict, columns=['price'])

# # Display predictions
# print(df_predictions)

# # Combine predictions with test IDs
# combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

# # Save predictions to CSV
# combined_df.to_csv('fathi.csv', index=False)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB


ValueError: 
All the 30 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\admin\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\admin\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 770, in fit
    self._fit(
  File "C:\Users\admin\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 925, in _fit
    X, y = self._initialize(X, y)
           ^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\admin\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 862, in _initialize
    self.model_ = self._build_keras_model()
                  ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\admin\anaconda3\Lib\site-packages\scikeras\wrappers.py", line 433, in _build_keras_model
    model = final_build_fn(**build_params)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\admin\AppData\Local\Temp\ipykernel_5592\3879735645.py", line 71, in create_model
NameError: name 'Adam' is not defined


In [275]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from scikeras.wrappers import KerasRegressor
import warnings

# Load train and test datasets
train_df = pd.read_csv('train 2.csv')
test_df = pd.read_csv('test.csv')

# Checking information of the train data
train_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB


In [277]:
# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']

# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value 
    return value 

# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)

# Clean and convert 'running' column to numeric values
train_df['running'] = pd.to_numeric(train_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')

# Drop duplicates from the training data
# train_df = train_df.drop_duplicates()


In [279]:
# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis=1)
y = train_df['price']

In [281]:
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # Scale training data
X_test_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))  # Scale test data

In [289]:
# Suppress warnings
warnings.filterwarnings('ignore')

# Function to create the Keras model
def create_model(learning_rate=0.01):
    model = Sequential()
    model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
    model.add(BatchNormalization())  # Add Batch Normalization
    model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
    model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(1))  # Output layer for regression
    optimizer = optimizers.Adam(learning_rate=learning_rate)  # Use Adam optimizer with learning rate as argument
    model.compile(optimizer=optimizer, loss='mae')  # Compile with Mean Absolute Error
    return model

# Create KerasRegressor
model = KerasRegressor(build_fn=create_model, verbose=0)
model

In [291]:
# # Define the parameter distributions for RandomizedSearchCV
# param_distributions = {
# #     'batch_size': [10, 20, 40],
# #     'epochs': [50, 100],
# #     'learning_rate': [0.001, 0.01, 0.1]
# # }

# # # Create the RandomizedSearchCV object
# # grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
# # grid_result = grid.fit(X_scaled, y)

ValueError: Invalid parameter learning_rate for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(learning_rate=0.001)`
Check the list of available parameters with `estimator.get_params().keys()`

In [None]:
# print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

# # Use the best model for predictions
# best_model = grid_result.best_estimator_

In [297]:
from scikeras.wrappers import KerasRegressor

model = Sequential()
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
model.add(BatchNormalization())  # Add Batch Normalization
model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1))  # Output layer for regression
# Create the KerasRegressor with the learning_rate parameter
model = KerasRegressor(model=create_model, learning_rate=0.001)  # Default learning rate can be set here


In [299]:
param_distributions = {
    'learning_rate': [0.001, 0.01, 0.1],  # Update with a range of values you want to try
    'batch_size': [16, 32, 64],
    'epochs': [50, 100]
}
from tensorflow.keras.optimizers import Adam

In [301]:
# Create the RandomizedSearchCV object
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model
grid_result = grid.fit(X_scaled, y)


Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 15901.5918
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 11976.2910
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4825.7798
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4001.5667
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4111.6704
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3672.5757
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4293.3481
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3773.4502
Epoch 9/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3567.3865
Epoch 10/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

In [303]:
print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')

# Use the best model for predictions
best_model = grid_result.best_estimator_

Best: 0.6913153529167175 using {'learning_rate': 0.01, 'epochs': 100, 'batch_size': 32}


In [305]:
best_params = grid.best_params_
print("Best parameters found: ", best_params)


Best parameters found:  {'learning_rate': 0.01, 'epochs': 100, 'batch_size': 32}


In [315]:
# Create the final model using the best parameters
final_model = create_model(learning_rate=best_params['learning_rate'])

# Fit the final model using the entire dataset and the best batch_size and epochs
final_model.fit(X_scaled, y, batch_size=64, epochs=100)


Epoch 1/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 15731.2754
Epoch 2/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15567.8906 
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12823.7988
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 7504.2974
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4504.4937 
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4077.6008 
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3913.9409 
Epoch 8/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4189.0957 
Epoch 9/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3603.7014 
Epoch 10/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x1f6124742c0>

In [317]:
# Evaluate the final model on a test set (assuming you have test data)
test_loss = final_model.evaluate(X_scaled, y)
print("Test loss:", test_loss)


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 2058.8628
Test loss: 2016.3404541015625


In [319]:
# Predict on new data (assuming X_new is your new input data)
y_predict = final_model.predict(X_test_scaled)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [323]:
# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])

# Display predictions
print(df_predictions)



combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)

combined_df



combined_df.to_csv('finer.csv', index=False)



            price
0    17706.572266
1    16260.050781
2    19795.552734
3    14922.053711
4     5695.276367
..            ...
406  21976.302734
407  14179.119141
408  13992.166992
409  16688.074219
410  13884.369141

[411 rows x 1 columns]


In [325]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from scikeras.wrappers import KerasRegressor
import warnings
 
# Load train and test datasets
train_df = pd.read_csv('train 2.csv')
test_df = pd.read_csv('test.csv')
 
# Checking information of the train data
train_df.info()
# Initialize LabelEncoder and dictionary to store encoders for each column
label_encoders = {}
categorical_columns = ['model', 'motor_type', 'wheel', 'color', 'status', 'type']
 
# Apply LabelEncoder to each categorical column for both train and test
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    train_df[col] = label_encoders[col].fit_transform(train_df[col])
    test_df[col] = test_df[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)
 
# Function to convert running values from 'km' to 'miles'
def convert_running(value):
    if 'km' in value:
        kilometers = int(value.split()[0])
        miles = kilometers * 0.621371
        return f"{miles:.2f} miles"
    elif 'miles' in value:
        return value 
    return value
 
# Apply the conversion for both train and test datasets
train_df['running'] = train_df['running'].apply(convert_running)
test_df['running'] = test_df['running'].apply(convert_running)
 
# Clean and convert 'running' column to numeric values
train_df['running'] = pd.to_numeric(train_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')
test_df['running'] = pd.to_numeric(test_df['running'].str.replace('miles', '').str.strip(), errors='coerce').fillna(0).astype('int64')
 
# Drop duplicates from the training data
# train_df = train_df.drop_duplicates()
 
# Prepare features and target variable
X = train_df.drop(['price', 'wheel'], axis=1)
y = train_df['price']
 
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # Scale training data
X_test_scaled = scaler.transform(test_df.drop(columns=['Id', 'wheel']))  # Scale test data
 
warnings.filterwarnings('ignore')
 
# Function to create the Keras model
def create_model(learning_rate=0.01):
    model = Sequential()
    model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
    model.add(BatchNormalization())  # Add Batch Normalization
    model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
    model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(1))  # Output layer for regression
    optimizer = optimizers.Adam(learning_rate=learning_rate)  # Use Adam optimizer with learning rate as argument
    model.compile(optimizer=optimizer, loss='mae')  # Compile with Mean Absolute Error
    return model
 
# Create KerasRegressor
model = KerasRegressor(build_fn=create_model, verbose=0)
model
 
from scikeras.wrappers import KerasRegressor
 
model = Sequential()
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)))
model.add(BatchNormalization())  # Add Batch Normalization
model.add(Dropout(0.3))  # Add Dropout to prevent overfitting
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1))  # Output layer for regression
# Create the KerasRegressor with the learning_rate parameter
model = KerasRegressor(model=create_model, learning_rate=0.001)  # Default learning rate can be set here

param_distributions = {
    'learning_rate': [0.001, 0.01, 0.1],  # Update with a range of values you want to try
    'batch_size': [16, 32, 64],
    'epochs': [50, 100]
}
from tensorflow.keras.optimizers import Adam
 
# Create the RandomizedSearchCV object
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)
 
# Fit the model
grid_result = grid.fit(X_scaled, y)

print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')
 
# Use the best model for predictions
best_model = grid_result.best_estimator_
 
best_params = grid.best_params_
print("Best parameters found: ", best_params)
 
# Create the final model using the best parameters
final_model = create_model(learning_rate=best_params['learning_rate'])
 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   int64  
 2   motor_type    1642 non-null   object 
 3   running       1642 non-null   object 
 4   wheel         1642 non-null   object 
 5   color         1642 non-null   object 
 6   type          1642 non-null   object 
 7   status        1642 non-null   object 
 8   motor_volume  1642 non-null   float64
 9   price         1642 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 128.4+ KB
Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 15911.1621
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 12224.5723
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 5007.6572
E

In [327]:
# Fit the final model using the entire dataset and the best batch_size and epochs
final_model.fit(X_scaled, y, batch_size=32, epochs=100)
 
# Evaluate the final model on a test set (assuming you have test data)
test_loss = final_model.evaluate(X_scaled, y)
print("Test loss:", test_loss)
 
# Predict on new data (assuming X_new is your new input data)
y_predict = final_model.predict(X_test_scaled)

# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])
 
# Display predictions
print(df_predictions)
 
 
combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)
 
combined_df
 
 
combined_df.to_csv('finers.csv', index=False)

Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2684.6985
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2821.5295
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2700.0127
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2812.4099
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2788.9656
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2722.8638
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2735.0737
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2737.6084
Epoch 9/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2628.7551
Epoch 10/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [329]:
from sklearn.model_selection import RandomizedSearchCV
import warnings
warnings.filterwarnings('ignore')

# Define the parameter grid for RandomizedSearchCV
param_distributions = {
    'activation': ['relu', 'tanh', 'sigmoid', 'linear', 'LeakyReLU'],  # Activation functions to try
    'batch_size': [16, 32, 64],
    'epochs': [50, 100],
    'learning_rate': [0.001, 0.01, 0.1]
}

# Wrap the Keras model in scikit-learn wrapper
model = KerasRegressor(model=create_model, verbose=0)

# Set up the RandomizedSearchCV
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model and find the best activation function and parameters
grid_result = grid.fit(X_scaled, y)

# Print the best parameters
print(f'Best score: {grid_result.best_score_}')
print(f'Best parameters: {grid_result.best_params_}')


ValueError: Invalid parameter learning_rate for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(learning_rate=0.1)`
Check the list of available parameters with `estimator.get_params().keys()`

In [331]:
from tensorflow.keras import Sequential, layers, regularizers
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV
from functools import partial

# Function to create the Keras model with an activation parameter
def create_model(activation='relu', learning_rate=0.01):
    model = Sequential()
    model.add(layers.Dense(256, activation=activation, kernel_regularizer=regularizers.l2(0.01), input_shape=(X_scaled.shape[1],)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(128, activation=activation, kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(64, activation=activation, kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.Dense(1))  # Output layer for regression
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mae')
    
    return model

# Parameter grid (excluding learning_rate, passed via functools.partial)
param_distributions = {
    'activation': ['relu', 'tanh', 'sigmoid', 'linear', 'LeakyReLU'],  # Activation functions to try
    'batch_size': [16, 32, 64],
    'epochs': [50, 100]
}

# Use functools.partial to pass learning_rate to KerasRegressor
model = KerasRegressor(model=partial(create_model, learning_rate=0.01), verbose=0)

# Set up the RandomizedSearchCV
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model and find the best activation function and parameters
grid_result = grid.fit(X_scaled, y)

# Print the best parameters
print(f'Best score: {grid_result.best_score_}')
print(f'Best parameters: {grid_result.best_params_}')


ValueError: Invalid parameter activation for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(activation=relu)`
Check the list of available parameters with `estimator.get_params().keys()`

In [333]:
from tensorflow.keras import Sequential, layers, regularizers
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV
from functools import partial

# Function to create the Keras model with an activation parameter
def create_model(activation='relu', learning_rate=0.01):
    model = Sequential()
    model.add(layers.Dense(256, activation=activation, kernel_regularizer=regularizers.l2(0.01), input_shape=(X_scaled.shape[1],)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(128, activation=activation, kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(64, activation=activation, kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.Dense(1))  # Output layer for regression
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mae')
    
    return model

# Parameter grid (excluding learning_rate, passed via functools.partial)
param_distributions = {
    'activation': ['relu', 'tanh', 'sigmoid', 'linear', 'LeakyReLU'],  # Activation functions to try
    'batch_size': [16, 32, 64],
    'epochs': [50, 100]
}

# Use functools.partial to pass learning_rate and set activation as a default
model = KerasRegressor(model=partial(create_model, learning_rate=0.01), activation='relu', verbose=0)

# Set up the RandomizedSearchCV
grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_jobs=-1, cv=3, n_iter=10)

# Fit the model and find the best activation function and parameters
grid_result = grid.fit(X_scaled, y)

# Print the best parameters
print(f'Best score: {grid_result.best_score_}')
print(f'Best parameters: {grid_result.best_params_}')


Best score: 0.7072226603825887
Best parameters: {'epochs': 100, 'batch_size': 32, 'activation': 'relu'}


In [345]:
activation='relu'
model = Sequential()
model.add(layers.Dense(256, activation=activation, kernel_regularizer=regularizers.l2(0.01), input_shape=(X_scaled.shape[1],)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))

model.add(layers.Dense(128, activation=activation, kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))

model.add(layers.Dense(64, activation=activation, kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.Dense(1))

In [348]:

# Compile the model with a learning rate scheduler
optimizer = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mae'])


model.fit(X, y,  epochs = 100, batch_size = 32)



Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 4576.0132 - mae: 4339.6592
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4336.6880 - mae: 4151.2681
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4276.8696 - mae: 4122.7295
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4201.6377 - mae: 4068.1428
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4312.8818 - mae: 4194.1655
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4173.8442 - mae: 4065.8933
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4186.8872 - mae: 4086.6133
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 4376.4458 - mae: 4281.5918
Epoch 9/100
[1m52/52[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1f60ed78320>

In [335]:
# Create the final model using the best parameters
final_model = create_model(learning_rate=0.01)

# Fit the final model using the entire dataset and the best batch_size and epochs
final_model.fit(X_scaled, y, batch_size=32, epochs=100)

Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 15752.4814
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 11834.4746
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4810.0483
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4432.3496
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4266.5742
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4435.3252
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3516.0261
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4116.8882
Epoch 9/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3832.4683
Epoch 10/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

<keras.src.callbacks.history.History at 0x1f618be41a0>

In [350]:
# Predict on new data (assuming X_new is your new input data)
y_predict = final_model.predict(X_test_scaled)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [352]:
# Create a DataFrame for the predictions
df_predictions = pd.DataFrame(y_predict, columns=['price'])
 
# Display predictions
print(df_predictions)
 
 
combined_df = pd.concat([test_df['Id'], df_predictions], axis=1)
 
combined_df
 
 
combined_df.to_csv('winer.csv', index=False)

            price
0    17884.533203
1    16864.273438
2    21443.730469
3    15267.827148
4     5269.961914
..            ...
406  23502.531250
407  13476.540039
408  13968.190430
409  17083.126953
410  13389.291016

[411 rows x 1 columns]
