### Step 1: Importing required Modules

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

### Step 2: Importing dataset

In [2]:
ds = pd.read_csv("car_purchasing.csv", encoding="ISO-8859-1")
ds.head()

Unnamed: 0,customer name,customer e-mail,country,gender,age,annual Salary,credit card debt,net worth,car purchase amount
0,Martina Avila,cubilia.Curae.Phasellus@quisaccumsanconvallis.edu,Bulgaria,0,41.85172,62812.09301,11609.38091,238961.2505,35321.45877
1,Harlan Barnes,eu.dolor@diam.co.uk,Belize,0,40.870623,66646.89292,9572.957136,530973.9078,45115.52566
2,Naomi Rodriquez,vulputate.mauris.sagittis@ametconsectetueradip...,Algeria,1,43.152897,53798.55112,11160.35506,638467.1773,42925.70921
3,Jade Cunningham,malesuada@dignissim.com,Cook Islands,1,58.271369,79370.03798,14426.16485,548599.0524,67422.36313
4,Cedric Leach,felis.ullamcorper.viverra@egetmollislectus.net,Brazil,1,57.313749,59729.1513,5358.712177,560304.0671,55915.46248


### Step 3: checking null values and describing the dataset 

In [3]:
ds.shape

(500, 9)

In [4]:
ds.isnull().sum()

customer name          0
customer e-mail        0
country                0
gender                 0
age                    0
annual Salary          0
credit card debt       0
net worth              0
car purchase amount    0
dtype: int64

In [5]:
ds.describe()

Unnamed: 0,gender,age,annual Salary,credit card debt,net worth,car purchase amount
count,500.0,500.0,500.0,500.0,500.0,500.0
mean,0.506,46.241674,62127.239608,9607.645049,431475.713625,44209.799218
std,0.500465,7.978862,11703.378228,3489.187973,173536.75634,10773.178744
min,0.0,20.0,20000.0,100.0,20000.0,9000.0
25%,0.0,40.949969,54391.977195,7397.515792,299824.1959,37629.89604
50%,1.0,46.049901,62915.497035,9655.035568,426750.12065,43997.78339
75%,1.0,51.612263,70117.862005,11798.867487,557324.478725,51254.709517
max,1.0,70.0,100000.0,20000.0,1000000.0,80000.0


### Step 4: Encoding the catagorical values into Numerical values

In [6]:
ds.columns

Index(['customer name', 'customer e-mail', 'country', 'gender', 'age',
       'annual Salary', 'credit card debt', 'net worth',
       'car purchase amount'],
      dtype='object')

In [7]:
ds = ds.drop(columns=['customer name', 'customer e-mail', 'gender','age'])
ds.head()


Unnamed: 0,country,annual Salary,credit card debt,net worth,car purchase amount
0,Bulgaria,62812.09301,11609.38091,238961.2505,35321.45877
1,Belize,66646.89292,9572.957136,530973.9078,45115.52566
2,Algeria,53798.55112,11160.35506,638467.1773,42925.70921
3,Cook Islands,79370.03798,14426.16485,548599.0524,67422.36313
4,Brazil,59729.1513,5358.712177,560304.0671,55915.46248


In [8]:
le = LabelEncoder()

In [9]:
ds['country'].unique()

array(['Bulgaria', 'Belize', 'Algeria', 'Cook Islands', 'Brazil',
       'Liberia', 'Syria', 'Czech Republic', 'Armenia', 'Somalia',
       'Sint Maarten', 'Greenland', 'Nicaragua', 'Palestine, State of',
       'United Arab Emirates', 'Gabon', 'Tokelau', 'Portugal', 'Chad',
       'Iraq', 'Sudan', 'Angola', 'Nigeria', 'Madagascar', 'Macedonia',
       'Oman', 'Colombia', 'Namibia', 'Denmark', 'Dominican Republic',
       'Costa Rica', 'Botswana', 'Saint Pierre and Miquelon', 'France',
       'Senegal', 'Cambodia', 'Suriname', 'Turkey', 'Ethiopia',
       'French Guiana', 'Bhutan', 'Georgia', 'Mozambique', 'Nauru',
       'Timor-Leste', 'Djibouti', 'Argentina', 'Taiwan', 'Åland Islands',
       'Turks and Caicos Islands', 'Samoa', 'Vanuatu', 'Mongolia',
       'Sri Lanka', 'Guinea-Bissau', 'Egypt', 'Slovenia', 'Benin',
       'Cape Verde', 'Paraguay', 'Laos', 'Iceland', 'Viet Nam',
       'Marshall Islands', 'Germany', 'Kyrgyzstan', 'Mauritius', 'Gambia',
       'Reunion', 'Jordan', 'S

In [10]:
ds['country'] = le.fit_transform(ds['country'])

In [11]:
print("Encoded values for countries:")
for index, label in enumerate(le.classes_):
    print(f"{label} -> {index}")

Encoded values for countries:
Afghanistan -> 0
Algeria -> 1
American Samoa -> 2
Andorra -> 3
Angola -> 4
Anguilla -> 5
Antarctica -> 6
Argentina -> 7
Armenia -> 8
Aruba -> 9
Australia -> 10
Austria -> 11
Bahamas -> 12
Bahrain -> 13
Bangladesh -> 14
Belarus -> 15
Belgium -> 16
Belize -> 17
Benin -> 18
Bermuda -> 19
Bhutan -> 20
Bolivia -> 21
Bonaire, Sint Eustatius and Saba -> 22
Bosnia and Herzegovina -> 23
Botswana -> 24
Bouvet Island -> 25
Brazil -> 26
Bulgaria -> 27
Cambodia -> 28
Cameroon -> 29
Canada -> 30
Cape Verde -> 31
Cayman Islands -> 32
Central African Republic -> 33
Chad -> 34
Chile -> 35
China -> 36
Christmas Island -> 37
Cocos (Keeling) Islands -> 38
Colombia -> 39
Congo (Brazzaville) -> 40
Cook Islands -> 41
Costa Rica -> 42
Croatia -> 43
Curaçao -> 44
Czech Republic -> 45
Denmark -> 46
Djibouti -> 47
Dominican Republic -> 48
Ecuador -> 49
Egypt -> 50
El Salvador -> 51
Equatorial Guinea -> 52
Ethiopia -> 53
Falkland Islands -> 54
Faroe Islands -> 55
France -> 56
French 

In [12]:
import pickle

In [13]:
with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(le, file)

In [14]:
ds['country']

0       27
1       17
2        1
3       41
4       26
      ... 
495    128
496    208
497    144
498     24
499    209
Name: country, Length: 500, dtype: int64

In [15]:
ds.head()

Unnamed: 0,country,annual Salary,credit card debt,net worth,car purchase amount
0,27,62812.09301,11609.38091,238961.2505,35321.45877
1,17,66646.89292,9572.957136,530973.9078,45115.52566
2,1,53798.55112,11160.35506,638467.1773,42925.70921
3,41,79370.03798,14426.16485,548599.0524,67422.36313
4,26,59729.1513,5358.712177,560304.0671,55915.46248


### Step 5: Splitting the whole data into training and testing data

In [16]:
X = ds.drop(columns = "car purchase amount")
y = ds["car purchase amount"]

In [17]:
X.head()

Unnamed: 0,country,annual Salary,credit card debt,net worth
0,27,62812.09301,11609.38091,238961.2505
1,17,66646.89292,9572.957136,530973.9078
2,1,53798.55112,11160.35506,638467.1773
3,41,79370.03798,14426.16485,548599.0524
4,26,59729.1513,5358.712177,560304.0671


In [18]:
print(X.dtypes)

country               int64
annual Salary       float64
credit card debt    float64
net worth           float64
dtype: object


In [19]:
y.head()

0    35321.45877
1    45115.52566
2    42925.70921
3    67422.36313
4    55915.46248
Name: car purchase amount, dtype: float64

In [20]:
X.head()

Unnamed: 0,country,annual Salary,credit card debt,net worth
0,27,62812.09301,11609.38091,238961.2505
1,17,66646.89292,9572.957136,530973.9078
2,1,53798.55112,11160.35506,638467.1773
3,41,79370.03798,14426.16485,548599.0524
4,26,59729.1513,5358.712177,560304.0671


### Step 6: Using StandedScalar for Enhancing Model Performance and spliting the whole data into training and testing

In [21]:
ss = StandardScaler()
X = ss.fit_transform(X)

In [22]:
print(X)

[[-1.25451191  0.05857619  0.57427133 -1.11046945]
 [-1.42027285  0.38657041 -0.0099515   0.57392937]
 [-1.68549034 -0.71236095  0.4454518   1.19397625]
 ...
 [ 0.68489103  0.57832106  0.28802326  1.92114437]
 [-1.30424019 -1.05333541  1.26384749 -0.54019026]
 [ 1.76233711 -0.06470949 -0.06205457  0.18153098]]


In [23]:
from sklearn.model_selection import train_test_split

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=20)

In [25]:
X_train.shape

(400, 4)

### Step 7: Defining an Artificial Neural Network 

In [26]:
model = tf.keras.models.Sequential()

In [27]:
model.add(tf.keras.layers.Dense(units=4, activation='relu')) #input layer
model.add(tf.keras.layers.Dense(units=128, activation='relu'))   # Hidden layer 1
model.add(tf.keras.layers.Dense(units = 64, activation='relu'))  # Hidden layer 2
model.add(tf.keras.layers.Dense(units = 32, activation='relu'))  # Hidden layer 3
model.add(tf.keras.layers.Dense(units=1, activation='linear'))  # output layer

### Step 8: Compiling Network

In [28]:
model.compile(loss='mse',optimizer='adam' , metrics=['mae', 'mean_squared_logarithmic_error'])


### Step 9: Fitting Network

In [29]:
#fitting the model
model.fit(X_train, y_train, epochs=30, batch_size=6, verbose=True)

Epoch 1/30
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 2000724224.0000 - mae: 43223.8320 - mean_squared_logarithmic_error: 96.2476 
Epoch 2/30
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2107764608.0000 - mae: 44539.2148 - mean_squared_logarithmic_error: 40.0298
Epoch 3/30
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1869168896.0000 - mae: 41824.2070 - mean_squared_logarithmic_error: 12.0897  
Epoch 4/30
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1242111488.0000 - mae: 33549.9492 - mean_squared_logarithmic_error: 2.7695  
Epoch 5/30
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 378128416.0000 - mae: 16421.9648 - mean_squared_logarithmic_error: 0.3515
Epoch 6/30
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 192265344.0000 - mae: 11176.7510 - mean_squared_logarithmic_error:

<keras.src.callbacks.history.History at 0x1d4d2d7f970>

### Step 10: Model Evaluating

In [30]:
loss, mae, msle = model.evaluate(X_test, y_test)

# Print all the evaluation results in one line
print(f'Mean Squared Error (MSE): {loss} | Mean Absolute Error (MAE): {mae} | Mean Squared Logarithmic Error (MSLE): {msle}')

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 58369020.0000 - mae: 6126.6748 - mean_squared_logarithmic_error: 0.0299 
Mean Squared Error (MSE): 52007540.0 | Mean Absolute Error (MAE): 5845.08935546875 | Mean Squared Logarithmic Error (MSLE): 0.026224084198474884


### Step 11: Saving model

In [31]:
model.save("car_purchase_rate.h5") 



### Step 12: Predicting Model

In [32]:
country = "Bulgaria"
annual_salary = 62812.09301
credit_card_debt = 11609.24798
net_worth = 238961.2505

In [33]:
input_data = pd.DataFrame({
        'country': [country],
        'annual_salary': [annual_salary],
        'credit_card_debt': [credit_card_debt],
        'networth': [net_worth]
    })

In [34]:
input_data['country'] = le.transform([country])[0]

In [35]:
prediction = model.predict(input_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step


In [36]:
scaled_prediction = prediction[0].item() / 100000

In [37]:
print(f"The estimated car purchase rate is: ${scaled_prediction:,.4f}")

The estimated car purchase rate is: $33,504.8755
