In [None]:
import os
import pandas as pd

# Load the CarDekho dataset into a Pandas dataframe
df = pd.read_csv('Car details v3.csv')

# Drop irrelevant columns
df = df.drop(['Car_Name', 'Seller_Type'], axis=1)

# Convert categorical variables into numerical ones using one-hot encoding
df = pd.get_dummies(df, columns=['Fuel_Type', 'Transmission'])

# Convert year of manufacture into age of the car
current_year = 2023
df['Age'] = current_year - df['Year']
df = df.drop(['Year'], axis=1)

# Normalize the mileage and engine displacement features
df['Mileage'] = (df['Mileage'] - df['Mileage'].mean()) / df['Mileage'].std()
df['Engine'] = (df['Engine'] - df['Engine'].mean()) / df['Engine'].std()

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

X = df.drop(['Selling_Price'], axis=1)
y = df['Selling_Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Import the determined.ai library and create a Trial
import determined as det
from determined.experimental import Determined

class MyTrial(det.Trial):
    def build_model(self):
        # Define the input layer
        input_layer = det.keras.Input(shape=(X_train.shape[1],))

        # Define the dense layers
        dense_layer1 = det.keras.layers.Dense(units=64, activation='relu')(input_layer)
        dense_layer2 = det.keras.layers.Dense(units=32, activation='relu')(dense_layer1)

        # Define the output layer
        output_layer = det.keras.layers.Dense(units=1)(dense_layer2)

        # Create the model
        model = det.keras.Model(inputs=input_layer, outputs=output_layer)

        # Compile the model
        model.compile(optimizer=det.keras.optimizers.Adam(learning_rate=self.context.get_hparam('learning_rate')),
                      loss='mean_squared_error')

        return model

    def build_training_data_loader(self):
        return det.keras.DataLoader(X_train, y_train, batch_size=self.context.get_per_slot_batch_size())

    def build_validation_data_loader(self):
        return det.keras.DataLoader(X_test, y_test, batch_size=self.context.get_per_slot_batch_size())

# Define the hyperparameters to search over
hyperparameters = det.ExperimentHyperparameters(
    hyperparameter_defaults={
        'learning_rate': det.Constant(value=0.001),
        'batch_size': det.Constant(value=64)
    })

# Define the experiment configuration
config = {
    'description': 'CarDekho price prediction',
    'data': {
        'train': X_train,
        'val': X_test
    },
    'hyperparameters': hyperparameters,
    'searcher': 'single',
    'scheduler': det.schedulers.SingleStepLRScheduler(
        step_size=20, gamma=0.1
    ),
    'model': det.Model(
        model_def=MyTrial,
        checkpoint_policy=det.CheckpointPolicy(
            frequency=det.IntervalUnit.EPOCH, 
            interval=1
        ),
    ),
}

# Start the experiment
experiment_id = Determined().create_experiment(config=config)

# Wait for the experiment to complete
trial = Determined().get_experiment(experiment_id).top_trial()
while trial.state != det.TrialState.COMPLETED:
    trial = Determined().get_trial(trial_id=trial.id)


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the model architecture
model_def = """
    model:
        type: feedforward
        module:
            type: torch
            path: torch_nn.py
            args:
                hidden_size: 256
                num_layers: 2
                dropout: 0.1
                activation: relu
        input_ports:
            features:
                type: typed_csv
                shape: !tuple [null, 7]
        output_ports:
            price:
                type: float
"""

# Define the experiment configuration
config = {
    "hyperparameters": {
        "global_batch_size": 64,
        "learning_rate": 0.001,
        "num_epochs": 50,
        "patience": 3
    },
    "searcher": {
        "name": "single",
        "metric": "val_loss",
        "max_steps": 50
    },
    "scheduling_unit": "epoch",
    "min_checkpoint_period": "00:01:00",
    "min_validation_period": "00:01:00",
    "data_layer": {
        "name": "torch",
        "args": {
            "train_data": {
                "type": "numpy",
                "x": X_train,
                "y": y_train
            },
            "val_data": {
                "type": "numpy",
                "x": X_test,
                "y": y_test
            }
        }
    },
    "model": {
        "name": "torch_feedforward",
        "definition": model_def
    },
    "optimization": {
        "name": "adam",
        "args": {}
    },
    "execution": {
        "num_training_units": "epoch",
        "gpu": True
    }
}

# Create an experiment in Determined
experiment = determinedsys.create_experiment(config)

# Train the model
for i in range(config["searcher"]["max_steps"]):
    trial = experiment.get_trial()
    model = trial.get_model()
    model.fit(trial.get_batch_size("global_batch_size"), trial.get_data_layer())
    experiment.trial_close(trial, {"val_loss": model.evaluate(trial.get_data_layer())})

# Evaluate the best model on the test set
best_checkpoint = experiment.top_checkpoint()
best_model = experiment.get_model_from_checkpoint(best_checkpoint)
X_test_scaled = scaler.transform(X_test)
test_loss = best_model.evaluate(determined.TorchData(context=experiment.d.context, data=(X_test_scaled, y_test)))
print(f"Test loss: {test_loss}")


In [None]:
import determined as det
import determined.keras as detk
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Load the dataset
df = pd.read_csv('car_data.csv')

# Preprocess the data
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

# One-hot encode the categorical features
X = pd.get_dummies(pd.DataFrame(X, columns=['Car_Name', 'Fuel_Type', 'Seller_Type', 'Transmission']))
X = X.values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the model architecture
def build_model(hparams):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(hparams.get('hidden_size', 128), activation='relu'),
        tf.keras.layers.Dropout(hparams.get('dropout', 0.2)),
        tf.keras.layers.Dense(hparams.get('hidden_size', 128), activation='relu'),
        tf.keras.layers.Dropout(hparams.get('dropout', 0.2)),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hparams.get('learning_rate', 0.001)),
                  loss='mse', metrics=['mae'])
    return model

# Define the experiment configuration
config = {
    'hyperparameters': {
        'hidden_size': det.ConfigRange(128, 512, 128),
        'dropout': det.ConfigRange(0.1, 0.5, 0.1),
        'learning_rate': det.LogConfigRange(1e-4, 1e-2)
    },
    'searcher': {
        'name': 'single',
        'max_steps': 50,
        'max_trials': 3
    },
    'data': {
        'train': {
            'x': X_train,
            'y': y_train
        },
        'validation': {
            'x': X_test,
            'y': y_test
        }
    },
    'model': {
        'model_class': build_model,
        'fit': {
            'epochs': 50,
            'batch_size': det.TrialContext(hparams='batch_size')
        }
    }
}

# Create a Determined experiment
experiment_config = det.ExperimentConfig(
    experiment_name='car_price_prediction',
    description='Predict the price of a used car based on its features',
    hyperparameters=config['hyperparameters'],
    searcher=config['searcher'],
    data=config['data'],
    model=config['model']
)
experiment = det.create_experiment(experiment_config)

# Train the model
@det.keras.wrappers.training_loop
def train(model, context):
    model.fit(context.trial.train_dataset, epochs=context.get_hparam('fit.epochs'),
              steps_per_epoch=context.get_per_slot_batch_size())

train()

# Evaluate the best model on the test set
best_checkpoint = experiment.top_checkpoint()
model = experiment.get_model_from_checkpoint(best_checkpoint)
test_loss, test_mae = model.evaluate(x=X_test, y=y_test)
print(f'Test loss: {test_loss:.2f}, Test MAE: {test_mae:.2f}')


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
car_data = pd.read_csv('car_data.csv')

# Drop the name column
car_data.drop('name', axis=1, inplace=True)

# Convert the fuel type column to binary
car_data['fuel_type'] = pd.get_dummies(car_data['fuel_type'], drop_first=True)

# One-hot encode the categorical variables
car_data = pd.get_dummies(car_data, columns=['seller_type', 'transmission', 'owner'])

# Split the data into train and test sets
from sklearn.model_selection import train_test_split

X = car_data.drop('selling_price', axis=1)
y = car_data['selling_price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the model
import tensorflow as tf

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=[X_train.shape[1]]),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(loss='mse', optimizer='adam')

# Train the model
model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=100)

# Evaluate the model
from sklearn.metrics import r2_score, mean_squared_error

y_pred = model.predict(X_test_scaled)
print('R^2 Score:', r2_score(y_test, y_pred))
print('MSE:', mean_squared_error(y_test, y_pred))

# Save the model
model.save('car_price_prediction_model')
