In [None]:
#IMPORTING LIBRARIES
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
#PREPARING DATASET & PRINTING
dataset = pd.read_csv('Car_Purchasing_Data.csv', encoding = 'ISO-8859-1')
dataset

In [None]:
#VISUALIZING THE DATASET
sns.pairplot(dataset)

In [None]:
#CLEANING THE DATASET BY DROPPING UNNECESSARY COLUMNS
#Unecessary columns = 'Customer Name', 'Customer Email' and dropping 'Car Purchase Amount' too as it is the dependent variable

X = dataset.drop(['Customer Name', 'Customer e-mail', 'Car Purchase Amount'], axis = 1)
y = dataset['Car Purchase Amount']

#VISUALIZING THE DATASET AFTER DROPPING UNNECESSARY COLUMNS
#X
#y

#CHECKING SHAPE OF X and y
#X.shape
#y.shape

In [None]:
#ONE HOT ENCODING CATEGORICAL DATA - 'County' column
#Post One hot encoding the number of columns have inclreased from 6 to 216 due to different countries
# Here, 'Country' means the actual column name
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(sparse_output=False), ['Country'])], remainder='passthrough')
X = ct.fit_transform(X)

In [None]:
#APPLYING FEATURE SCALING TO NORMALIZE THE VALUES
sc_x = MinMaxScaler()
sc_y = MinMaxScaler()
X = sc_x.fit_transform(X)
y = sc_y.fit_transform(y.values.reshape(-1, 1))

In [None]:
#VISUALIZING THE DATASET AFTER FEATURE SCALING
print(X)
print(y)

In [None]:
#SPLITTING THE DATASET INTO TRAINING AND TESTING SETS
#Here, 80% of the data is used for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
#BUILDING THE ARTIFICIAL NEURAL NETWORK(ANN) MODEL 
#INITIALIZING THE ANN
ann = tf.keras.models.Sequential()

In [None]:
#ADDING THE INPUT LAYER AND FIRST HIDDEN LAYER

ann.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=216))

In [None]:
#ADDING THE SECOND HIDDEN LAYER

ann.add(tf.keras.layers.Dense(units=16, activation='relu'))

In [None]:
#ADDING THE THIRD HIDDEN LAYER

ann.add(tf.keras.layers.Dense(units=32, activation='relu'))

In [None]:
#ADDING THE FOURTH HIDDEN LAYER

ann.add(tf.keras.layers.Dense(units=32, activation='relu'))

In [None]:
#ADDING THE OUTPUT LAYER

ann.add(tf.keras.layers.Dense(units=1, activation='linear'))

In [None]:
#CHECKING THE MODEL SUMMARY

ann.summary()

In [None]:
#COMPILING THE ANN

ann.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [None]:
#TRAINING THE ANN ON THE TRAINING SET

output = ann.fit(X_train, y_train, batch_size=5, epochs=500, verbose=1, validation_split=0.2)  

In [None]:
#EVALUATING THE MODEL
#These are the values that are returned by the fit method after every epoch

output.history.keys()

In [None]:
#PLOTTING ALL THE VALUES RETURNED BY THE FIT METHOD

plt.plot(output.history['loss'])
plt.plot(output.history['val_loss'])
plt.legend(['Training Loss', 'Validation Loss'])
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')


In [None]:
#CALCULATING THE MEAN ABSOLUTE ERROR AND R2 VALUE

#Predicting values on full test set
y_pred = ann.predict(X_test)

# Inverse scale if needed
y_pred_original = sc_y.inverse_transform(y_pred.reshape(-1, 1))
y_test_original = sc_y.inverse_transform(y_test.reshape(-1, 1))

# Then evaluate
# Calculate MAE and R² score
mae = mean_absolute_error(y_test_original, y_pred_original)
r2 = r2_score(y_test_original, y_pred_original)

print("MAE:", mae)
print("R² Score:", r2)

In [None]:
#PREDICTING NEW VALUE

#Preparing the raw input data
5

#Encodinig the Country column using the same ColumnTransformer (ct) you used during training
X_new_encoded = ct.transform(X_new)

# Applying the same Scaler you used during training
X_new_scaled = sc_x.transform(X_new_encoded)

# Predicting the scaled value
y_pred_new = ann.predict(X_new_scaled)


#y_pred value is scaled, so we need to inverse transform it to get the original value
#We use the same scaler (sc_y) you used during training to inverse transform the predicted value
y_pred_original = sc_y.inverse_transform(y_pred_new.reshape(-1, 1))

print(f"Expected Purchase Amount: {y_pred_original[0][0]}")