In [18]:
import pandas as pd
import numpy as np
import os

# Paths
attr_csv = r"D:\Data science\datasets\Hair\hair_length_labels.csv"
img_folder = r"D:\Data science\datasets\Hair\img_align_celeba"

# Load CSV
df = pd.read_csv(attr_csv)

# Convert hair_length to numeric: 0=short, 1=long
df['label'] = df['hair_length'].apply(lambda x: 1 if x=='long' else 0)

print(df.head())


     image_id hair_length  label
0  000001.jpg       short      0
1  000002.jpg        long      1
2  000003.jpg       short      0
3  000004.jpg       short      0
4  000005.jpg        long      1


In [1]:
import pandas as pd

attr_csv = r"D:\Data science\datasets\Hair\hair_length_labels.csv"
df = pd.read_csv(attr_csv)

# Count long and short hair
print(df['hair_length'].value_counts())


hair_length
short    106185
long      96414
Name: count, dtype: int64


In [1]:
import pandas as pd

attr_csv = r"D:\Data science\datasets\Hair\hair_length_labels.csv"
df = pd.read_csv(attr_csv)

# Separate classes
df_long = df[df['hair_length']=='long']
df_short = df[df['hair_length']=='short']

# Sample 5000 each
df_long_sampled = df_long.sample(n=5000, random_state=42)
df_short_sampled = df_short.sample(n=5000, random_state=42)

# Combine and shuffle
df_small = pd.concat([df_long_sampled, df_short_sampled]).sample(frac=1, random_state=42)
df_small['label'] = df_small['hair_length'].apply(lambda x: 1 if x=='long' else 0)

print("Sampled dataset size:", df_small.shape)
print(df_small['hair_length'].value_counts())


Sampled dataset size: (10000, 3)
hair_length
short    5000
long     5000
Name: count, dtype: int64


In [2]:
from tensorflow.keras.preprocessing import image
import numpy as np
import os
from tqdm import tqdm

IMG_SIZE = 128
X = []
y = []

img_folder = r"D:\Data science\datasets\Hair\img_align_celeba"

for _, row in tqdm(df_small.iterrows(), total=len(df_small)):
    img_path = os.path.join(img_folder, row['image_id'])
    if os.path.exists(img_path):
        img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
        img_array = image.img_to_array(img) / 255.0  # normalize
        X.append(img_array)
        y.append(row['label'])

X = np.array(X)
y = np.array(y)

print("Images shape:", X.shape)
print("Labels shape:", y.shape)


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [00:25<00:00, 392.44it/s]


Images shape: (10000, 128, 128, 3)
Labels shape: (10000,)


In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(X)


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE,IMG_SIZE,3)),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
history = model.fit(
    datagen.flow(X, y, batch_size=32),
    epochs=20
)


  self._warn_if_super_not_called()


Epoch 1/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 217ms/step - accuracy: 0.5928 - loss: 0.6798
Epoch 2/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 210ms/step - accuracy: 0.7107 - loss: 0.5687
Epoch 3/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 210ms/step - accuracy: 0.7350 - loss: 0.5353
Epoch 4/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 211ms/step - accuracy: 0.7572 - loss: 0.5191
Epoch 5/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 209ms/step - accuracy: 0.7760 - loss: 0.4775
Epoch 6/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 211ms/step - accuracy: 0.7897 - loss: 0.4553
Epoch 7/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 209ms/step - accuracy: 0.8077 - loss: 0.4358
Epoch 8/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 210ms/step - accuracy: 0.8222 - loss: 0.4185
Epoch 9/20
[1m3

In [21]:
model.save(r"D:\Data science\datasets\Hair\hair_length.h5")
print("✅ Model saved successfully")




✅ Model saved successfully


In [8]:
from tensorflow.keras.models import load_model

model_path = r"D:\Data science\datasets\Hair\hair_length_cnn.h5"
model = load_model(model_path)
print("✅ Model loaded successfully")




✅ Model loaded successfully


In [19]:
test_images = ["000001.jpg", "000002.jpg", "000003.jpg"]

for img_name in test_images:
    img_path = os.path.join(r"D:\Data science\datasets\Hair\img_align_celeba", img_name)
    print(img_name, "->", predict_hair_length(img_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
000001.jpg -> Long Hair
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
000002.jpg -> Long Hair
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
000003.jpg -> Short Hair


In [20]:
from tensorflow.keras.preprocessing import image
import numpy as np
import os

IMG_SIZE = 128

def predict_hair_length(img_path):
    if not os.path.exists(img_path):
        return "Image not found"
    
    img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    x = image.img_to_array(img) / 255.0
    x = np.expand_dims(x, axis=0)
    pred = model.predict(x)
    return "Long Hair" if pred[0][0] > 0.5 else "Short Hair"

# Test with an example image
test_img = r"D:\Data science\datasets\Hair\img_align_celeba\000001.jpg"
print("Prediction:", predict_hair_length(test_img))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Prediction: Long Hair


In [1]:
import tensorflow as tf
load_model = tf.keras.models.load_model




In [3]:
hair_model = load_model("hair_length.h5")



In [5]:
from tensorflow.keras.models import load_model
import numpy as np
import cv2
from PIL import Image

# Load your trained model
model = load_model("hair_length.h5")

# ---- Test 1: confirm model loads ----
model.summary()

# ---- Test 2: test on an actual image ----
# Replace with your image path
img_path = "harsha.jpg"
# Read and preprocess image
img = Image.open(img_path).convert("RGB")
img = img.resize((128, 128))
img_arr = np.array(img) / 255.0
img_arr = np.expand_dims(img_arr, axis=0)

# Predict
pred = model.predict(img_arr)[0][0]

# Convert to label
if pred > 0.5:
    print(f"Prediction: LONG HAIR (score={pred:.3f})")
else:
    print(f"Prediction: SHORT HAIR (score={pred:.3f})")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
Prediction: SHORT HAIR (score=0.172)


In [7]:
import tensorflow as tf

# Load old h5 model in original environment
from keras.models import load_model
old_model = load_model("hair_length.h5")

# Save in TF Keras format
tf.keras.models.save_model(old_model, "hair_length_tf.h5")




In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.model_selection import train_test_split
from PIL import Image
import os

# -------------------
# Paths
# -------------------
dataset_dir = r"D:\Data science\datasets\Hair\img_align_celeba"
csv_path = r"D:\Data science\datasets\Hair\hair_length_labels.csv"

In [13]:
# hair_length_train_jupyter.ipynb

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.model_selection import train_test_split
from PIL import Image
import os

# -------------------
# Paths
# -------------------
dataset_dir = r"D:\Data science\datasets\Hair\img_align_celeba"
csv_path = r"D:\Data science\datasets\Hair\hair_length_labels.csv"

# -------------------
# Load CSV
# -------------------
df = pd.read_csv(csv_path)

# Strip spaces in column names just in case
df.columns = df.columns.str.strip()

# Map string labels to integers
df['hair_length'] = df['hair_length'].map({'short':0, 'long':1})
print("Unique labels:", df['hair_length'].unique())

# -------------------
# Sample 5000 images per class
# -------------------
long_count = len(df[df['hair_length']==1])
short_count = len(df[df['hair_length']==0])

df_long = df[df['hair_length']==1].sample(min(5000, long_count), random_state=42)
df_short = df[df['hair_length']==0].sample(min(5000, short_count), random_state=42)

df_sample = pd.concat([df_long, df_short]).reset_index(drop=True)
print("Total images used:", len(df_sample))

Unique labels: [0 1]
Total images used: 10000


In [14]:
train_df, val_df = train_test_split(
    df_sample, test_size=0.2, random_state=42, stratify=df_sample['hair_length']
)

# -------------------
# Image preprocessing
# -------------------
IMG_SIZE = (128, 128)

def preprocess_image(img_name):
    img_path = os.path.join(dataset_dir, img_name)
    img = Image.open(img_path).convert("RGB")
    img = img.resize(IMG_SIZE)
    return np.array(img)/255.0

# -------------------
# Data generator
# -------------------
def data_generator(df, batch_size=32):
    n = len(df)
    while True:
        df = df.sample(frac=1).reset_index(drop=True)  # shuffle each epoch
        for i in range(0, n, batch_size):
            batch_df = df.iloc[i:i+batch_size]
            X = np.array([preprocess_image(img) for img in batch_df['image_id']])
            y = np.array(batch_df['hair_length'])
            yield X, y

In [15]:
BATCH_SIZE = 32
train_gen = data_generator(train_df, BATCH_SIZE)
val_gen = data_generator(val_df, BATCH_SIZE)

steps_per_epoch = len(train_df)//BATCH_SIZE
validation_steps = len(val_df)//BATCH_SIZE

# -------------------
# Build CNN Model
# -------------------
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(128,128,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
EPOCHS = 20
history = model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_gen,
    validation_steps=validation_steps,
    epochs=EPOCHS
)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 195ms/step - accuracy: 0.5888 - loss: 0.6952 - val_accuracy: 0.7782 - val_loss: 0.4934
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 171ms/step - accuracy: 0.8083 - loss: 0.4431 - val_accuracy: 0.8382 - val_loss: 0.4002
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 168ms/step - accuracy: 0.8437 - loss: 0.3678 - val_accuracy: 0.8357 - val_loss: 0.3822
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 180ms/step - accuracy: 0.8623 - loss: 0.3301 - val_accuracy: 0.8476 - val_loss: 0.3682
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 169ms/step - accuracy: 0.8710 - loss: 0.3037 - val_accuracy: 0.8440 - val_loss: 0.3696
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 160ms/step - accuracy: 0.8838 - loss: 0.2685 - val_accuracy: 0.8425 - val_loss: 0.4141
Epoch 7/20

In [17]:
model.save("hair_length_vscode.h5")
print("Model saved successfully as hair_length_vscode.h5")



Model saved successfully as hair_length_vscode.h5


In [18]:
import sys
print(sys.executable)


D:\anaconda\python.exe
