### Step 1: Importing required Modules

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

### Step 2: Importing dataset

In [2]:
ds = pd.read_csv("car_purchasing.csv", encoding="ISO-8859-1")
ds.head()

Unnamed: 0,customer name,customer e-mail,country,gender,age,annual Salary,credit card debt,net worth,car purchase amount
0,Martina Avila,cubilia.Curae.Phasellus@quisaccumsanconvallis.edu,Bulgaria,0,41.85172,62812.09301,11609.38091,238961.2505,35321.45877
1,Harlan Barnes,eu.dolor@diam.co.uk,Belize,0,40.870623,66646.89292,9572.957136,530973.9078,45115.52566
2,Naomi Rodriquez,vulputate.mauris.sagittis@ametconsectetueradip...,Algeria,1,43.152897,53798.55112,11160.35506,638467.1773,42925.70921
3,Jade Cunningham,malesuada@dignissim.com,Cook Islands,1,58.271369,79370.03798,14426.16485,548599.0524,67422.36313
4,Cedric Leach,felis.ullamcorper.viverra@egetmollislectus.net,Brazil,1,57.313749,59729.1513,5358.712177,560304.0671,55915.46248


### Step 3: checking null values and describing the dataset 

In [3]:
ds.shape

(500, 9)

In [4]:
ds.isnull().sum()

customer name          0
customer e-mail        0
country                0
gender                 0
age                    0
annual Salary          0
credit card debt       0
net worth              0
car purchase amount    0
dtype: int64

In [5]:
ds.describe()

Unnamed: 0,gender,age,annual Salary,credit card debt,net worth,car purchase amount
count,500.0,500.0,500.0,500.0,500.0,500.0
mean,0.506,46.241674,62127.239608,9607.645049,431475.713625,44209.799218
std,0.500465,7.978862,11703.378228,3489.187973,173536.75634,10773.178744
min,0.0,20.0,20000.0,100.0,20000.0,9000.0
25%,0.0,40.949969,54391.977195,7397.515792,299824.1959,37629.89604
50%,1.0,46.049901,62915.497035,9655.035568,426750.12065,43997.78339
75%,1.0,51.612263,70117.862005,11798.867487,557324.478725,51254.709517
max,1.0,70.0,100000.0,20000.0,1000000.0,80000.0


### Step 4: Encoding the catagorical values into Numerical values

In [6]:
ds.columns

Index(['customer name', 'customer e-mail', 'country', 'gender', 'age',
       'annual Salary', 'credit card debt', 'net worth',
       'car purchase amount'],
      dtype='object')

In [7]:
ds = ds.drop(columns=['customer name', 'customer e-mail', 'gender','age'])
ds.head()


Unnamed: 0,country,annual Salary,credit card debt,net worth,car purchase amount
0,Bulgaria,62812.09301,11609.38091,238961.2505,35321.45877
1,Belize,66646.89292,9572.957136,530973.9078,45115.52566
2,Algeria,53798.55112,11160.35506,638467.1773,42925.70921
3,Cook Islands,79370.03798,14426.16485,548599.0524,67422.36313
4,Brazil,59729.1513,5358.712177,560304.0671,55915.46248


In [8]:
le = LabelEncoder()

In [9]:
ds['country'].unique()

array(['Bulgaria', 'Belize', 'Algeria', 'Cook Islands', 'Brazil',
       'Liberia', 'Syria', 'Czech Republic', 'Armenia', 'Somalia',
       'Sint Maarten', 'Greenland', 'Nicaragua', 'Palestine, State of',
       'United Arab Emirates', 'Gabon', 'Tokelau', 'Portugal', 'Chad',
       'Iraq', 'Sudan', 'Angola', 'Nigeria', 'Madagascar', 'Macedonia',
       'Oman', 'Colombia', 'Namibia', 'Denmark', 'Dominican Republic',
       'Costa Rica', 'Botswana', 'Saint Pierre and Miquelon', 'France',
       'Senegal', 'Cambodia', 'Suriname', 'Turkey', 'Ethiopia',
       'French Guiana', 'Bhutan', 'Georgia', 'Mozambique', 'Nauru',
       'Timor-Leste', 'Djibouti', 'Argentina', 'Taiwan', 'Åland Islands',
       'Turks and Caicos Islands', 'Samoa', 'Vanuatu', 'Mongolia',
       'Sri Lanka', 'Guinea-Bissau', 'Egypt', 'Slovenia', 'Benin',
       'Cape Verde', 'Paraguay', 'Laos', 'Iceland', 'Viet Nam',
       'Marshall Islands', 'Germany', 'Kyrgyzstan', 'Mauritius', 'Gambia',
       'Reunion', 'Jordan', 'S

In [10]:
ds['country'] = le.fit_transform(ds['country'])

In [11]:
print("Encoded values for countries:")
for index, label in enumerate(le.classes_):
    print(f"{label} -> {index}")

Encoded values for countries:
Afghanistan -> 0
Algeria -> 1
American Samoa -> 2
Andorra -> 3
Angola -> 4
Anguilla -> 5
Antarctica -> 6
Argentina -> 7
Armenia -> 8
Aruba -> 9
Australia -> 10
Austria -> 11
Bahamas -> 12
Bahrain -> 13
Bangladesh -> 14
Belarus -> 15
Belgium -> 16
Belize -> 17
Benin -> 18
Bermuda -> 19
Bhutan -> 20
Bolivia -> 21
Bonaire, Sint Eustatius and Saba -> 22
Bosnia and Herzegovina -> 23
Botswana -> 24
Bouvet Island -> 25
Brazil -> 26
Bulgaria -> 27
Cambodia -> 28
Cameroon -> 29
Canada -> 30
Cape Verde -> 31
Cayman Islands -> 32
Central African Republic -> 33
Chad -> 34
Chile -> 35
China -> 36
Christmas Island -> 37
Cocos (Keeling) Islands -> 38
Colombia -> 39
Congo (Brazzaville) -> 40
Cook Islands -> 41
Costa Rica -> 42
Croatia -> 43
Curaçao -> 44
Czech Republic -> 45
Denmark -> 46
Djibouti -> 47
Dominican Republic -> 48
Ecuador -> 49
Egypt -> 50
El Salvador -> 51
Equatorial Guinea -> 52
Ethiopia -> 53
Falkland Islands -> 54
Faroe Islands -> 55
France -> 56
French 

In [12]:
import pickle

In [13]:
with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(le, file)

In [14]:
ds['country']

0       27
1       17
2        1
3       41
4       26
      ... 
495    128
496    208
497    144
498     24
499    209
Name: country, Length: 500, dtype: int64

In [15]:
ds.head()

Unnamed: 0,country,annual Salary,credit card debt,net worth,car purchase amount
0,27,62812.09301,11609.38091,238961.2505,35321.45877
1,17,66646.89292,9572.957136,530973.9078,45115.52566
2,1,53798.55112,11160.35506,638467.1773,42925.70921
3,41,79370.03798,14426.16485,548599.0524,67422.36313
4,26,59729.1513,5358.712177,560304.0671,55915.46248


### Step 5: Splitting the whole data into training and testing data

In [16]:
X = ds.drop(columns = "car purchase amount")
y = ds["car purchase amount"]

In [17]:
X.head()

Unnamed: 0,country,annual Salary,credit card debt,net worth
0,27,62812.09301,11609.38091,238961.2505
1,17,66646.89292,9572.957136,530973.9078
2,1,53798.55112,11160.35506,638467.1773
3,41,79370.03798,14426.16485,548599.0524
4,26,59729.1513,5358.712177,560304.0671


In [18]:
print(X.dtypes)

country               int64
annual Salary       float64
credit card debt    float64
net worth           float64
dtype: object


In [19]:
y.head()

0    35321.45877
1    45115.52566
2    42925.70921
3    67422.36313
4    55915.46248
Name: car purchase amount, dtype: float64

### Step 6: Using StandedScalar for Enhancing Model Performance and spliting the whole data into training and testing

In [20]:
ss = StandardScaler()
X = ss.fit_transform(X)

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [23]:
X_train.shape

(400, 4)

### Step 7: Defining an Artificial Neural Network 

In [24]:
model = tf.keras.models.Sequential()

In [25]:
model.add(tf.keras.layers.Dense(units=4, activation='relu')) #input layer
model.add(tf.keras.layers.Dense(units=128, activation='relu'))   # Hidden layer 1
model.add(tf.keras.layers.Dense(units = 64, activation='relu'))  # Hidden layer 2
model.add(tf.keras.layers.Dense(units = 32, activation='relu'))  # Hidden layer 3
model.add(tf.keras.layers.Dense(units=1))  # output layer

### Step 8: Compiling Network

In [26]:
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error'])

### Step 9: Fitting Network

for batch_size=6 and epochs=30 i am getting 5500 around mean absolute error

In [27]:
#fitting the model
model.fit(X_train, y_train, epochs=30, batch_size=5)

Epoch 1/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 2127972608.0000 - mean_absolute_error: 44713.9336
Epoch 2/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2036771584.0000 - mean_absolute_error: 43836.8633
Epoch 3/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1833474560.0000 - mean_absolute_error: 41245.3516
Epoch 4/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 940760256.0000 - mean_absolute_error: 26660.0996
Epoch 5/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 420984064.0000 - mean_absolute_error: 16787.7363
Epoch 6/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 337615680.0000 - mean_absolute_error: 15225.6211
Epoch 7/30
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 283525152.0000 - mean_absolute_error: 13991.5391
Epoch 8/30

<keras.src.callbacks.history.History at 0x1bbed25ed40>

### Step 10: Model Evaluating

In [28]:
loss, mae = model.evaluate(X_test,y_test)
print(f'Mean Absolute Error (MAE): {mae}')

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 58057932.0000 - mean_absolute_error: 6145.6582  
Mean Absolute Error (MAE): 6076.09326171875


### Step 11: Saving model

In [29]:
model.save("car_purchase_rate.h5") 

