In [None]:
!pip install tensorflow opencv-python-headless
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model



In [None]:
# Load CSV files
train_df = pd.read_csv('train_labels.csv')
test_df = pd.read_csv('test_labels.csv')

# Function to load and preprocess images
def preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (224, 224))
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    return img

# Create datasets
train_ds = tf.data.Dataset.from_tensor_slices((
    train_df['Pothole number'].apply(lambda x: f'./train_images/p{x}.jpg'),
    train_df['Bags used ']
))
train_ds = train_ds.map(lambda x, y: (preprocess_image(x), y))
train_ds = train_ds.batch(32)

# Similar for test_ds
test_ds = tf.data.Dataset.from_tensor_slices((
    test_df['Pothole number'].apply(lambda x: f'./test_images/p{x}.jpg'),
    test_df['Bags used ']
))
test_ds = test_ds.map(lambda x, y: (preprocess_image(x), y))
test_ds = test_ds.batch(32)

In [None]:
base_model = MobileNetV2(weights='imagenet', include_top=False)
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
output = Dense(1)(x)
model = Model(inputs=base_model.input, outputs=output)

# Freeze the base_model layers
for layer in base_model.layers:
    layer.trainable = False

  base_model = MobileNetV2(weights='imagenet', include_top=False)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Load CSV files
train_df = pd.read_csv('train_labels.csv')
test_df = pd.read_csv('test_labels.csv')

# Function to load and preprocess images
def preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (224, 224))
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    return img

# Function to create a dataset
def create_dataset(dataframe, is_train=True, shuffle=True, batch_size=32):
    if is_train:
        ds = tf.data.Dataset.from_tensor_slices((
            dataframe['Pothole number'].apply(lambda x: f'/content/train_images/p{x}.jpg'),
            dataframe['Bags used  ']
        ))
        ds = ds.map(lambda x, y: (preprocess_image(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(
            dataframe['Pothole number'].apply(lambda x: f'/content/test_images/p{x}.jpg')
        )
        ds = ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

# Verify file existence
def verify_files(dataframe, is_train=True):
    missing_files = []
    for idx, row in dataframe.iterrows():
        file_path = f'/content/{"train" if is_train else "test"}_images/p{row["Pothole number"]}.jpg'
        if not os.path.exists(file_path):
            missing_files.append(file_path)
    return missing_files

# Check for missing files
train_missing = verify_files(train_df)
test_missing = verify_files(test_df, is_train=False)

if train_missing or test_missing:
    print("Missing files:")
    print("\n".join(train_missing + test_missing))
    raise FileNotFoundError("Some image files are missing. Please check the file paths.")

# Split data into train and validation
train_size = int(0.8 * len(train_df))
train_df, val_df = train_df[:train_size], train_df[train_size:]

# Create datasets
train_ds = create_dataset(train_df)
val_ds = create_dataset(val_df, shuffle=False)
test_ds = create_dataset(test_df, is_train=False, shuffle=False)

# Create the model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
output = Dense(1)(x)
model = Model(inputs=base_model.input, outputs=output)

# Freeze the base_model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds
)

# Make predictions on test set
predictions = model.predict(test_ds)

# Create a DataFrame with the predictions
results_df = pd.DataFrame({
    'Pothole number': test_df['Pothole number'],
    'Predicted Bags': predictions.flatten()
})

# Save the results
results_df.to_csv('predictions.csv', index=False)

print("Training completed and predictions saved to 'predictions.csv'")

Missing files:
/content/train_images/p101.0.jpg
/content/train_images/p102.0.jpg
/content/train_images/p106.0.jpg
/content/train_images/p107.0.jpg
/content/train_images/p109.0.jpg
/content/train_images/p110.0.jpg
/content/train_images/p111.0.jpg
/content/train_images/p112.0.jpg
/content/train_images/p113.0.jpg
/content/train_images/p115.0.jpg
/content/train_images/p116.0.jpg
/content/train_images/p117.0.jpg
/content/train_images/p118.0.jpg
/content/train_images/p119.0.jpg
/content/train_images/p120.0.jpg
/content/train_images/p121.0.jpg
/content/train_images/p122.0.jpg
/content/train_images/p123.0.jpg
/content/train_images/p124.0.jpg
/content/train_images/p125.0.jpg
/content/train_images/p126.0.jpg
/content/train_images/p127.0.jpg
/content/train_images/p128.0.jpg
/content/train_images/p129.0.jpg
/content/train_images/p132.0.jpg
/content/train_images/p133.0.jpg
/content/train_images/p134.0.jpg
/content/train_images/p135.0.jpg
/content/train_images/p136.0.jpg
/content/train_images/p138.0

FileNotFoundError: Some image files are missing. Please check the file paths.

In [None]:
# Load CSV files
train_df = pd.read_csv('train_labels.csv')
test_df = pd.read_csv('test_labels.csv')

# Function to load and preprocess images
def preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (224, 224))
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    return img

# Function to create a dataset
def create_dataset(dataframe, is_train=True, shuffle=True, batch_size=32):
    if is_train:
        ds = tf.data.Dataset.from_tensor_slices((
            dataframe['image_path'],
            dataframe['Bags used ']  # Note the extra space at the end
        ))
        ds = ds.map(lambda x, y: (preprocess_image(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(dataframe['image_path'])
        ds = ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

# Function to filter out missing files
def filter_missing_files(dataframe, is_train=True):
    folder = 'train_images' if is_train else 'test_images'
    dataframe['image_path'] = dataframe['Pothole number'].apply(lambda x: f'/content/{folder}/p{x}.jpg')
    existing_files = dataframe['image_path'].apply(os.path.exists)
    filtered_df = dataframe[existing_files]
    print(f"Removed {len(dataframe) - len(filtered_df)} entries due to missing files.")
    return filtered_df

# Filter out missing files
train_df = filter_missing_files(train_df)
test_df = filter_missing_files(test_df, is_train=False)

# Split data into train and validation
train_size = int(0.8 * len(train_df))
train_df, val_df = train_df[:train_size], train_df[train_size:]

# Create datasets
train_ds = create_dataset(train_df)
val_ds = create_dataset(val_df, shuffle=False)
test_ds = create_dataset(test_df, is_train=False, shuffle=False)

# Create the model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
output = Dense(1)(x)
model = Model(inputs=base_model.input, outputs=output)

# Freeze the base_model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds
)

# Make predictions on test set
predictions = model.predict(test_ds)

# Create a DataFrame with the predictions
results_df = pd.DataFrame({
    'Pothole number': test_df['Pothole number'],
    'Predicted Bags': predictions.flatten()
})

# Save the results
results_df.to_csv('predictions.csv', index=False)

print("Training completed and predictions saved to 'predictions.csv'")

Removed 255 entries due to missing files.
Removed 0 entries due to missing files.
Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 3s/step - loss: 3.5564 - val_loss: 0.9644
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2s/step - loss: 1.3973 - val_loss: 0.9191
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 0.7552 - val_loss: 0.8741
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 0.4841 - val_loss: 0.8702
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 0.3212 - val_loss: 0.7991
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 0.3044 - val_loss: 0.7359
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2s/step - loss: 0.2618 - val_loss: 0.7192
Epoch 8/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 