In [45]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import joblib
import torch.optim as optim

In [46]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [47]:
def scale_numerical_input(year, mileage, scaler):
    user_input = {
        "year" : year,
        "mileage" : mileage/1000
    }
    
    input_df = pd.DataFrame([user_input])
    input_df_scaled = pd.DataFrame(scaler.transform(input_df), columns=["year", "mileage"])
    res = input_df_scaled.to_numpy()
    user_input["year"], user_input["mileage"] = res[0][0], res[0][1]
    return user_input

In [48]:
def transform_raw_input_to_df(brand, model, engine, year, mileage, df, sc):
    user_input = df.iloc[0].copy()
    user_input.loc[:] = 0
    user_input.drop('price', inplace=True)
    
    # Handling categoricals
    user_input['brand_' + brand] = 1
    user_input['model_' + model] = 1
    user_input['engine_' + engine] = 1
    
    # Handling numericals 
    scaled_nums = scale_numerical_input(year, mileage, sc)
    user_input["mileage"] = scaled_nums["mileage"]
    user_input["year"] = scaled_nums["year"]
    
    return user_input

In [49]:
def init_model(weights, model_class):
    input_size = 972
    hidden_size = 128
    output_size = 1  
    model_eval = model_class(input_size, hidden_size, output_size)
    state_dict = torch.load(weights)
    model_eval.load_state_dict(state_dict)
    return model_eval

In [50]:
df_mean = pd.read_csv("data_cleaned_mean.csv")
sc_mean = joblib.load('scaler_mean.save')

df_knn = pd.read_csv("data_cleaned_knn.csv")
sc_knn = joblib.load('scaler_knn.save')

df_median = pd.read_csv("data_cleaned_median.csv")
sc_median = joblib.load('scaler_median.save')

df_most_freq = pd.read_csv("data_cleaned_most_freq.csv")
sc_most_freq = joblib.load('scaler_most_freq.save')

In [51]:
model = init_model('model_v_0.0.1_knn.pth', NeuralNetwork)

In [39]:
data = transform_raw_input_to_df("Toyota", "Avensis", "2.0D", 2007, 332000, df_knn, sc_knn)

model.eval()

with torch.no_grad():
    inputs = torch.tensor(data, dtype=torch.float32)
    predictions = model(inputs).numpy()
    predict 

In [40]:
predictions

array([3400.8894], dtype=float32)

In [68]:
target_column = 'price'

X = df_median.drop(columns=[target_column]).values
y = df_median[target_column].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.Tensor(y_test)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 64 
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [69]:
model = init_model('model_v_0.0.1_median.pth', NeuralNetwork)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

model.eval()

all_predictions = []
all_actuals = []

with torch.no_grad():
    for inputs, actuals in test_data_loader:
        predictions = model(inputs)
        mse = criterion(predictions, actuals)
        all_predictions.append(predictions.numpy())
        all_actuals.append(actuals.numpy())

all_predictions = np.concatenate(all_predictions)
all_actuals = np.concatenate(all_actuals)

overall_mse = nn.MSELoss()(torch.Tensor(all_predictions), torch.Tensor(all_actuals))
print(f'Overall Mean Squared Error on Test Set: {overall_mse.item():.4f}')

Overall Mean Squared Error on Test Set: 26190218.0000


In [None]:
# MEAN  => 24973720.0000
# KNN => 25123904.0000