In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping

In [2]:
df = pd.read_csv('Dataset/dataset_cleaned1.csv', low_memory=False)
df.columns

Index(['Maker', 'Model', 'Number_of_Cylinders', 'Engine_Type',
       'Engine_Horse_Power', 'Engine_Horse_Power_RPM', 'Transmission',
       'Mixed_Fuel_Consumption_per_100_km_l', 'Fuel_Tank_Capacity',
       'Acceleration_0_to_100_Km', 'Max_Speed_Km_per_Hour', 'Fuel_Grade',
       'Year', 'Type_of_Car'],
      dtype='object')

In [3]:
# Assuming 'Mixed_Fuel_Consumption_per_100_km_l' is the target variable
y = df['Mixed_Fuel_Consumption_per_100_km_l']

# Select specific columns as input features
selected_columns = ['Number_of_Cylinders',
                    'Engine_Type',
                    'Engine_Horse_Power',
                    'Engine_Horse_Power_RPM',
                    'Transmission',
                    'Acceleration_0_to_100_Km',
                    'Fuel_Grade']

X = df[selected_columns]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Build the TensorFlow Model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Assuming a regression problem
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])  # Add 'mae' as a metric
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, 
                    validation_split=0.2, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [5]:
# Evaluate the model on the test set
loss, mae = model.evaluate(X_test_scaled, y_test)
print(f'Mean Squared Error on Test Set: {loss}')
print(f'Mean Absolute Error on Test Set: {mae}')

Mean Squared Error on Test Set: 1.2467138767242432
Mean Absolute Error on Test Set: 0.8225128650665283


In [6]:
import joblib

# Save the model to an HDF5 file
model.save('model_dinova1.h5')

# Save the scaler to a joblib file
joblib.dump(scaler, 'scaler_dinova.joblib')

['scaler_dinova.joblib']