In [1]:
!pip install kagglehub



In [2]:
import kagglehub

# Download latest version
kagglehub.dataset_download("deepcontractor/car-price-prediction-challenge")



Downloading from https://www.kaggle.com/api/v1/datasets/download/deepcontractor/car-price-prediction-challenge?dataset_version_number=1...


100%|██████████| 429k/429k [00:00<00:00, 1.08MB/s]

Extracting files...





'/root/.cache/kagglehub/datasets/deepcontractor/car-price-prediction-challenge/versions/1'

In [4]:
data_path = "/root/.cache/kagglehub/datasets/deepcontractor/car-price-prediction-challenge/versions/1/car_price_prediction.csv"

In [5]:
import pandas as pd
data = pd.read_csv(data_path)
data.head()

Unnamed: 0,ID,Price,Levy,Manufacturer,Model,Prod. year,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Doors,Wheel,Color,Airbags
0,45654403,13328,1399,LEXUS,RX 450,2010,Jeep,Yes,Hybrid,3.5,186005 km,6.0,Automatic,4x4,04-May,Left wheel,Silver,12
1,44731507,16621,1018,CHEVROLET,Equinox,2011,Jeep,No,Petrol,3.0,192000 km,6.0,Tiptronic,4x4,04-May,Left wheel,Black,8
2,45774419,8467,-,HONDA,FIT,2006,Hatchback,No,Petrol,1.3,200000 km,4.0,Variator,Front,04-May,Right-hand drive,Black,2
3,45769185,3607,862,FORD,Escape,2011,Jeep,Yes,Hybrid,2.5,168966 km,4.0,Automatic,4x4,04-May,Left wheel,White,0
4,45809263,11726,446,HONDA,FIT,2014,Hatchback,Yes,Petrol,1.3,91901 km,4.0,Automatic,Front,04-May,Left wheel,Silver,4


In [6]:
data.columns.tolist()

['ID',
 'Price',
 'Levy',
 'Manufacturer',
 'Model',
 'Prod. year',
 'Category',
 'Leather interior',
 'Fuel type',
 'Engine volume',
 'Mileage',
 'Cylinders',
 'Gear box type',
 'Drive wheels',
 'Doors',
 'Wheel',
 'Color',
 'Airbags']

In [7]:
target = data["Price"]

In [8]:
data = data.drop(columns=["ID", "Price"], axis = 1)

In [9]:
data['Car Age'] = 2025 - data['Prod. year']
data = data.drop("Prod. year", axis=1)

In [10]:
data.columns.tolist()

['Levy',
 'Manufacturer',
 'Model',
 'Category',
 'Leather interior',
 'Fuel type',
 'Engine volume',
 'Mileage',
 'Cylinders',
 'Gear box type',
 'Drive wheels',
 'Doors',
 'Wheel',
 'Color',
 'Airbags',
 'Car Age']

In [11]:
data["Leather interior"] = data["Leather interior"].map({"Yes": 1, "No": 0})

In [12]:
import numpy as np

data['Levy'] = data['Levy'].replace('-', np.nan).astype(float)
data['Mileage'] = data['Mileage'].str.replace(' km', '').str.replace(',', '').astype(float)
data['Engine volume'] = data['Engine volume'].str.extract(r'([\d.]+)').astype(float)


In [13]:
numerical_cols = ['Levy', 'Mileage', 'Cylinders', 'Engine volume', 'Airbags', 'Car Age']
categorical_cols = ['Manufacturer', 'Model', 'Category', 'Fuel type',
                    'Gear box type', 'Drive wheels', 'Doors', 'Wheel', 'Color']

In [14]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent', fill_value='missing')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

In [15]:
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numerical_cols),
    ('cat', categorical_transformer, categorical_cols)
])

In [16]:
X_ready = preprocessor.fit_transform(data)

In [17]:
y = target.values

#Multi-Layer Perceptron

In [None]:
import tensorflow as tf

if hasattr(X_ready, "toarray"):
    X_ready = X_ready.toarray()

X_tf = tf.convert_to_tensor(X_ready, dtype=tf.float32)
y_tf = tf.convert_to_tensor(y.reshape(-1, 1), dtype=tf.float32)


In [None]:
import time
from sklearn.metrics import mean_squared_error
import numpy as np

def train_mlp(X, y, device = '/CPU: 0', epochs = 20, batch_size = 32):
  with tf.device(device):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation = 'relu', input_shape = (X.shape[1],)),
        tf.keras.layers.Dense(64, activation = 'relu'),
        tf.keras.layers.Dense(1)
    ])

    model.compile(optimizer = 'adam', loss = 'mse')

    print(f"Trainng on {device}...")
    start_time = time.time()

    histroy = model.fit(X, y, epochs = epochs, batch_size = batch_size, verbose = 0)

    train_time = time.time() - start_time

    preds  = model.predict(X)

    rmse = np.sqrt(mean_squared_error(y.numpy(), preds))

    #Inference time (Average over 100 predictions)

    start_inf = time.time()
    for _ in range(100):
      model.predict(X[:1])
    inf_time = (time.time() - start_inf) / 100


    print(f" Train Time: {train_time: .2f}s | RMSE: {rmse:.2f} | Inference Time: {inf_time*1000:.2f} ms/sample " )

    return train_time, rmse, inf_time

In [None]:
train_mlp(X_tf, y_tf, device='/CPU:0')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Trainng on /CPU:0...
[1m602/602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0

(77.84911942481995, np.float64(189894.60049195713), 0.11201799392700196)

In [None]:
train_mlp(X_tf, y_tf, device='/GPU:0')

Trainng on /GPU:0...
[1m602/602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[

(31.672447204589844, np.float64(190017.56280933612), 0.07541166543960572)

#TabNet

In [18]:
!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.3->pytorch-tabnet)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 

In [19]:
import numpy as np
import pandas as pd
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
import time

In [20]:
if hasattr(X_ready, 'toarray'):
    X_ready = X_ready.toarray()

In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    X_ready, y, test_size=0.2, random_state=42
)

In [22]:
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32).reshape(-1, 1)
y_test = y_test.astype(np.float32).reshape(-1, 1)

In [25]:
from tqdm import tqdm

def benchmark_tabnet(X_train, y_train, X_test, y_test, device_name='cpu'):
    print(f"\n Training TabNet on {device_name.upper()}...")

    model = TabNetRegressor(
        device_name=device_name,
        n_d=64, n_a=64,
        n_steps=5,
        gamma=1.5,
        lambda_sparse=1e-4,
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=2e-2),
        verbose=1
    )

    start_time = time.time()

    model.fit(
        X_train=X_train, y_train=y_train,
        eval_set=[(X_test, y_test)],
        eval_metric=['rmse'],
        max_epochs=10,
        patience=20,
        batch_size=1024,
        virtual_batch_size=128
    )

    train_time = time.time() - start_time

    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, preds))

    print("Benchmarking inference speed...")
    start_inf = time.time()
    for _ in tqdm(range(100), desc="Running inference"):
        model.predict(X_test[:1])
    inf_time = (time.time() - start_inf) / 100

    print(f"Train Time: {train_time:.2f}s | RMSE: {rmse:.2f} | Inference Time: {inf_time*1000:.2f} ms/sample")
    return train_time, rmse, inf_time


In [26]:
cpu_results = benchmark_tabnet(X_train, y_train, X_test, y_test, device_name='cpu')

if torch.cuda.is_available():
    gpu_results = benchmark_tabnet(X_train, y_train, X_test, y_test, device_name='cuda')
else:
    print("⚠️ GPU not available — skipping GPU benchmark")


 Training TabNet on CPU...




epoch 0  | loss: 45766006952.53333| val_0_rmse: 24486.40178|  0:00:33s
epoch 1  | loss: 45764408806.4| val_0_rmse: 24446.23816|  0:01:05s
epoch 2  | loss: 45757555074.13333| val_0_rmse: 24337.97099|  0:01:37s
epoch 3  | loss: 45750540637.86667| val_0_rmse: 24140.18393|  0:02:09s
epoch 4  | loss: 45734541595.73333| val_0_rmse: 23716.94011|  0:02:41s
epoch 5  | loss: 45711758346.66666| val_0_rmse: 23060.21336|  0:03:13s
epoch 6  | loss: 45680832089.6| val_0_rmse: 22557.69563|  0:03:45s
epoch 7  | loss: 45651345465.6| val_0_rmse: 22262.04735|  0:04:17s
epoch 8  | loss: 45601573107.2| val_0_rmse: 21768.45424|  0:04:48s
epoch 9  | loss: 45559891688.53333| val_0_rmse: 20724.22119|  0:05:20s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_val_0_rmse = 20724.22119




Benchmarking inference speed...


Running inference: 100%|██████████| 100/100 [00:01<00:00, 69.11it/s]


Train Time: 343.20s | RMSE: 20724.22 | Inference Time: 14.50 ms/sample

 Training TabNet on CUDA...
epoch 0  | loss: 45766100625.06667| val_0_rmse: 24487.52564|  0:00:02s
epoch 1  | loss: 45764777555.2| val_0_rmse: 24456.55217|  0:00:04s
epoch 2  | loss: 45758825211.73333| val_0_rmse: 24388.12432|  0:00:05s
epoch 3  | loss: 45752256366.93333| val_0_rmse: 24195.54868|  0:00:07s
epoch 4  | loss: 45738323129.6| val_0_rmse: 23931.12785|  0:00:08s
epoch 5  | loss: 45717411756.8| val_0_rmse: 23228.7601|  0:00:10s
epoch 6  | loss: 45689437781.33334| val_0_rmse: 22886.46692|  0:00:11s
epoch 7  | loss: 45658691686.4| val_0_rmse: 21710.41409|  0:00:13s
epoch 8  | loss: 45600995509.33334| val_0_rmse: 20969.64892|  0:00:14s
epoch 9  | loss: 45562696253.86667| val_0_rmse: 20011.24804|  0:00:15s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_val_0_rmse = 20011.24804




Benchmarking inference speed...


Running inference: 100%|██████████| 100/100 [00:01<00:00, 68.87it/s]

Train Time: 21.46s | RMSE: 20011.25 | Inference Time: 14.55 ms/sample





#RNN

In [None]:
import kagglehub

# Download latest version
kagglehub.dataset_download("rakibulhasanshaon69/the-verdict-txt")



'/kaggle/input/the-verdict-txt'

In [None]:
import tensorflow as tf
import numpy as np
import time
import os
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# ✅ Step 1: Load and clean verdict text file
with open("/kaggle/input/the-verdict-txt/the-verdict.txt", "r", encoding="utf-8") as file:
    text_data = file.read().lower().replace('\n', ' ').replace('\r', ' ')

# ✅ Step 2: Tokenization & sequence generation
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text_data])
total_words = len(tokenizer.word_index) + 1

# Create input sequences
tokens = tokenizer.texts_to_sequences([text_data])[0]
input_sequences = []
for i in range(1, len(tokens)):
    input_sequences.append(tokens[:i+1])

# Pad sequences and split into X and y
max_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]

# ✅ Step 3: Define benchmark function
def benchmark_rnn(X, y, device_name="/CPU:0"):
    print(f"\n🚀 Training RNN on {device_name.upper()}...")

    with tf.device(device_name):
        model = tf.keras.Sequential([
            tf.keras.layers.Embedding(total_words, 64, input_length=X.shape[1]),
            tf.keras.layers.LSTM(64),
            tf.keras.layers.Dense(total_words, activation='softmax')
        ])
        model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        # Train
        start_time = time.time()
        model.fit(X, y, epochs=100, verbose=0)
        train_time = time.time() - start_time

        # Inference time (single sample repeated)
        start_inf = time.time()
        for _ in range(100):
            model.predict(np.expand_dims(X[0], axis=0), verbose=0)
        inf_time = (time.time() - start_inf) / 100

        # Evaluate
        _, acc = model.evaluate(X, y, verbose=0)

        # Model size in MB
        model.save("temp_rnn_model.h5")
        model_size_mb = os.path.getsize("temp_rnn_model.h5") / 1e6
        os.remove("temp_rnn_model.h5")

        print(f"✅ Train Time: {train_time:.2f}s | Accuracy: {acc:.2%} | Inference Time: {inf_time*1000:.2f} ms/sample | Model Size: {model_size_mb:.2f} MB")
        return train_time, acc, inf_time, model_size_mb

# ✅ Step 4: Run benchmarks
cpu_results = benchmark_rnn(X, y, device_name="/CPU:0")

if tf.config.list_physical_devices('GPU'):
    gpu_results = benchmark_rnn(X, y, device_name="/GPU:0")
else:
    print("⚠️ No GPU available — skipping GPU benchmark.")



🚀 Training RNN on /CPU:0...




✅ Train Time: 1992.60s | Accuracy: 84.47% | Inference Time: 124.79 ms/sample | Model Size: 2.13 MB

🚀 Training RNN on /GPU:0...




✅ Train Time: 1931.52s | Accuracy: 87.30% | Inference Time: 138.21 ms/sample | Model Size: 2.13 MB
