#  Age Determination using Cervical Spine X-ray Images

The notebook presents a machine learning pipeline aimed at predicting the ages of patients using cervical spine X-ray images. This task is a regression problem, as the target variable is continuous (age). 

In [1]:
#Importing necessary libraries
import pandas as pd
import numpy as np
import cv2
import os
import warnings
warnings.filterwarnings("ignore")

#### Loading the Data

In [2]:
ls_images = os.listdir(r"C:\Users\Asus\Desktop\Cervical_Dataset\Cervical_Data\Final")
ls_images

['1-14-F.jpg',
 '10-15-M.jpg',
 '11-13-F.jpg',
 '12-14-F.jpg',
 '13-10-M.jpg',
 '14-14-F.jpg',
 '15-12-M.jpg',
 '16-13-M.jpg',
 '17-13-F.jpg',
 '18-12-M.jpg',
 '19-15-M.jpg',
 '2-10-M.jpg',
 '20-17-M.jpg',
 '21-13-F.jpg',
 '22-13-M.jpg',
 '23-15-F.jpg',
 '24-19-F.jpg',
 '25-14-F.jpg',
 '26-16-F.jpg',
 '27-14-F.jpg',
 '28-11-M.jpg',
 '29-16-M.jpg',
 '3-12-M.jpg',
 '30-12-M.jpg',
 '4-15-M.jpg',
 '5-13-O.jpg',
 '6-16-M.jpg',
 '7-12-M.jpg',
 '8-16-M.jpg',
 '9-10-F.jpg']

In [3]:
images = []

for i in ls_images:
    image_path = os.path.join(r"C:\Users\Asus\Desktop\Cervical_Dataset\Cervical_Data\Final", i)
    image = cv2.imread(image_path)
    image = image/255.0
    images.append(image)

In [4]:
# images

#### All images loaded successfully!

In [5]:
image_labels = [filename.split("-")[1] for filename in ls_images]

In [6]:
image_labels[:5]

['14', '15', '13', '14', '10']

In [7]:
image_labels = [int(i) for i in image_labels]

In [8]:
len(images), len(image_labels)

(30, 30)

In [9]:
print(image_labels)

[14, 15, 13, 14, 10, 14, 12, 13, 13, 12, 15, 10, 17, 13, 13, 15, 19, 14, 16, 14, 11, 16, 12, 12, 15, 13, 16, 12, 16, 10]


In [10]:
for i in images:
    print(i.size)
    print(i.shape)
    print("---------------------------------")

3934200
(1660, 790, 3)
---------------------------------
4854168
(1782, 908, 3)
---------------------------------
4004934
(1634, 817, 3)
---------------------------------
3269619
(1459, 747, 3)
---------------------------------
3949974
(1334, 987, 3)
---------------------------------
4588980
(1870, 818, 3)
---------------------------------
2969484
(1219, 812, 3)
---------------------------------
8923500
(1983, 1500, 3)
---------------------------------
7359195
(1909, 1285, 3)
---------------------------------
7020000
(2000, 1170, 3)
---------------------------------
8003160
(2068, 1290, 3)
---------------------------------
3499881
(1529, 763, 3)
---------------------------------
7180992
(1918, 1248, 3)
---------------------------------
8264304
(1979, 1392, 3)
---------------------------------
7690464
(1978, 1296, 3)
---------------------------------
7557699
(2017, 1249, 3)
---------------------------------
8824500
(1961, 1500, 3)
---------------------------------
3527433
(1533, 767, 3)

In [11]:
resized_images = []

for j in images:
    k = np.resize(j, 16384)
    resized_images.append(k)
    print(f"Previous size -", j.size)
    print(f"New size -", k.size)
    print("----------------")

Previous size - 3934200
New size - 16384
----------------
Previous size - 4854168
New size - 16384
----------------
Previous size - 4004934
New size - 16384
----------------
Previous size - 3269619
New size - 16384
----------------
Previous size - 3949974
New size - 16384
----------------
Previous size - 4588980
New size - 16384
----------------
Previous size - 2969484
New size - 16384
----------------
Previous size - 8923500
New size - 16384
----------------
Previous size - 7359195
New size - 16384
----------------
Previous size - 7020000
New size - 16384
----------------
Previous size - 8003160
New size - 16384
----------------
Previous size - 3499881
New size - 16384
----------------
Previous size - 7180992
New size - 16384
----------------
Previous size - 8264304
New size - 16384
----------------
Previous size - 7690464
New size - 16384
----------------
Previous size - 7557699
New size - 16384
----------------
Previous size - 8824500
New size - 16384
----------------
Previous size 

In [12]:
# resized_images

In [13]:
for i in images:
    print(i.shape)
    print("---------------------------------")

(1660, 790, 3)
---------------------------------
(1782, 908, 3)
---------------------------------
(1634, 817, 3)
---------------------------------
(1459, 747, 3)
---------------------------------
(1334, 987, 3)
---------------------------------
(1870, 818, 3)
---------------------------------
(1219, 812, 3)
---------------------------------
(1983, 1500, 3)
---------------------------------
(1909, 1285, 3)
---------------------------------
(2000, 1170, 3)
---------------------------------
(2068, 1290, 3)
---------------------------------
(1529, 763, 3)
---------------------------------
(1918, 1248, 3)
---------------------------------
(1979, 1392, 3)
---------------------------------
(1978, 1296, 3)
---------------------------------
(2017, 1249, 3)
---------------------------------
(1961, 1500, 3)
---------------------------------
(1533, 767, 3)
---------------------------------
(1819, 957, 3)
---------------------------------
(1895, 1194, 3)
---------------------------------
(1501, 940

In [14]:
image_shapes = []

for i in images:
    image_shapes.append(i.shape)

In [15]:
image_shapes

[(1660, 790, 3),
 (1782, 908, 3),
 (1634, 817, 3),
 (1459, 747, 3),
 (1334, 987, 3),
 (1870, 818, 3),
 (1219, 812, 3),
 (1983, 1500, 3),
 (1909, 1285, 3),
 (2000, 1170, 3),
 (2068, 1290, 3),
 (1529, 763, 3),
 (1918, 1248, 3),
 (1979, 1392, 3),
 (1978, 1296, 3),
 (2017, 1249, 3),
 (1961, 1500, 3),
 (1533, 767, 3),
 (1819, 957, 3),
 (1895, 1194, 3),
 (1501, 940, 3),
 (1741, 981, 3),
 (1500, 847, 3),
 (1304, 735, 3),
 (1712, 825, 3),
 (1482, 899, 3),
 (1656, 925, 3),
 (1454, 845, 3),
 (1041, 966, 3),
 (1327, 706, 3)]

In [16]:
reshaped_images = []

for m in resized_images:
    k = np.reshape(m, (128, 128))
    reshaped_images.append(k)

In [17]:
for i in range(0, 30):
    print(f"Previous shape -", image_shapes[i])
    print(f"New shape -", k.shape)
    print("----------------")

Previous shape - (1660, 790, 3)
New shape - (128, 128)
----------------
Previous shape - (1782, 908, 3)
New shape - (128, 128)
----------------
Previous shape - (1634, 817, 3)
New shape - (128, 128)
----------------
Previous shape - (1459, 747, 3)
New shape - (128, 128)
----------------
Previous shape - (1334, 987, 3)
New shape - (128, 128)
----------------
Previous shape - (1870, 818, 3)
New shape - (128, 128)
----------------
Previous shape - (1219, 812, 3)
New shape - (128, 128)
----------------
Previous shape - (1983, 1500, 3)
New shape - (128, 128)
----------------
Previous shape - (1909, 1285, 3)
New shape - (128, 128)
----------------
Previous shape - (2000, 1170, 3)
New shape - (128, 128)
----------------
Previous shape - (2068, 1290, 3)
New shape - (128, 128)
----------------
Previous shape - (1529, 763, 3)
New shape - (128, 128)
----------------
Previous shape - (1918, 1248, 3)
New shape - (128, 128)
----------------
Previous shape - (1979, 1392, 3)
New shape - (128, 128)
---

In [18]:
cervical_spine_images = np.array(reshaped_images)

In [19]:
ages = np.array(image_labels)

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

In [21]:
# Train_Test_Split
X_train, X_test, y_train, y_test = train_test_split(cervical_spine_images, ages, test_size=0.2, random_state=42) 

In [22]:
# Building the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D((2, 2)),
    Flatten(),
    
    Dense(128, activation='relu'),
    Dense(1)  # Single output for regression
           ])

In [23]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['r2_score'])

In [35]:
history = model.fit(X_train, y_train, validation_split = 0.2, epochs=30, batch_size=16)

Epoch 1/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 292ms/step - loss: 0.7859 - r2_score: 0.8606 - val_loss: 5.3856 - val_r2_score: -4.1785
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step - loss: 0.6463 - r2_score: 0.8971 - val_loss: 5.5418 - val_r2_score: -4.3287
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - loss: 0.7463 - r2_score: 0.8559 - val_loss: 5.7319 - val_r2_score: -4.5114
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 247ms/step - loss: 0.7200 - r2_score: 0.8881 - val_loss: 5.9840 - val_r2_score: -4.7538
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step - loss: 0.6566 - r2_score: 0.8983 - val_loss: 6.2572 - val_r2_score: -5.0166
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step - loss: 0.5371 - r2_score: 0.9069 - val_loss: 6.6138 - val_r2_score: -5.3594
Epoch 7/30
[1m2/2[0m [32m━━━━━━

In [36]:
print(f"R^2 Score: {history.history['r2_score'][-1]*100:.2f}%")

R^2 Score: 93.04%


This model achieved R^2 Score of 93.04%.

- 94% of the variance in patient ages was successfully explained by the features extracted from cervical spine X-ray images.

- The remaining 6% of the variability is due to factors not captured by the model, such as noise in the data, inherent variability in the biological processes, or potential limitations in the dataset.

### Conclusion 

This project lays a solid foundation for cervical bone age determination using deep learning. It demonstrates effective data preprocessing, the use of CNNs for image regression, and a systematic approach to training and validation. It can serve as a stepping stone for more complex medical imaging projects and pave the way for developing clinical-grade predictive systems.