In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
from PIL import Image
import os 
import plotly.express as px 
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
train_csv=pd.read_csv(r"/kaggle/input/csiro-biomass/train.csv")
test_csv=pd.read_csv(r'/kaggle/input/csiro-biomass/test.csv')
print(f"Train data :\n{train_csv.head()}")
print(f"\nTest data :\n{test_csv.head()}")

## Understanding data 

In [None]:
# fig=px.bar(train_csv.Species.value_counts(),title="count species")
# fig.show()
train_csv.Species.value_counts().sort_values().plot(kind="barh",title='count species',color="orange")
plt.show()

**Species with name Ryegrass (less than rygrass_cover), Rygrass_cover is highly present**


In [None]:
img_path="/kaggle/input/csiro-biomass/"
unique_img_path=train_csv.image_path.unique().tolist()
num_images=6

selected_images = unique_img_path[:num_images]

rows = (num_images + 1) // 2
cols = 2
fig, axes = plt.subplots(rows, cols, figsize=(10, rows * 4))
axes = axes.flatten()
for ax, img_name in zip(axes, selected_images):
    path = os.path.join(img_path, img_name)
    img = mpimg.imread(path)
    ax.imshow(img)
    ax.set_title(f"Name: {img_name}", fontsize=10)
    ax.axis("off")
for ax in axes[len(selected_images):]:
    ax.axis("off")
plt.suptitle("Sample Images from CSIRO Biomass Dataset", fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()


In [None]:
train_csv.head()

In [None]:
train_csv.State.value_counts().plot(kind="barh")
plt.show()

In [None]:
train_csv.target_name.value_counts()

In [None]:
train_csv.target.plot(color="orange")

In [None]:
train_test=train_csv.drop(columns=["Sampling_Date","State","Species","Pre_GSHH_NDVI","Height_Ave_cm"])
train_test.head()

In [None]:
train_test.shape

In [None]:
pivoted = train_test.pivot_table(index="image_path", columns="target_name", values="target").reset_index()


In [None]:
target_cols = pivoted.columns[1:]
target_cols

In [None]:
pivoted

In [None]:
from sklearn.model_selection import train_test_split
train_df,test_df=train_test_split(pivoted,random_state=42,test_size=0.2)

In [None]:
train_df

In [None]:
import numpy as np
train_data=[]
test_data=[]
for i in train_df.image_path:
    image=Image.open(img_path + i)
    image=image.resize((128,128))
    image=image.convert("RGB")
    image=np.array(image)
    train_data.append(image)
for i in test_df.image_path:
    image=Image.open(img_path + i)
    image=image.resize((128,128))
    image=image.convert("RGB")
    image=np.array(image)
    test_data.append(image)    
x_train=np.array(train_data)
x_test=np.array(test_data)
    


In [None]:
y_train = train_df[target_cols].values
y_test = test_df[target_cols].values

print("Train shape:", x_train.shape)
print("Test shape:", x_test.shape)
print("Targets:", target_cols)

In [None]:
def augment_images(images):
    # images=images/255
    augmented = []
    for img in images:
        img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
        img_tensor = tf.image.random_flip_left_right(img_tensor)
        img_tensor = tf.image.random_brightness(img_tensor, max_delta=0.1)
        img_tensor = tf.image.random_contrast(img_tensor, 0.8, 1.2)
        augmented.append(img_tensor.numpy())
    return np.array(augmented)

x_train = augment_images(x_train)



In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
# from tensorflow.keras.applications import (
#     ResNet50, MobileNetV2, DenseNet121, InceptionV3
# )
# base_model = ResNet50(
#     include_top=False,
#     weights='imagenet',
#     input_shape=(128, 128, 3)
# )
# for layer in base_model.layers[:-20]:
#     layer.trainable = False
model = models.Sequential([
    layers.Input(shape=(128,128,3)),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(target_cols), activation='linear')  # regression output
])
callback = keras.callbacks.EarlyStopping(monitor='val_rmse',
                                         patience=3,restore_best_weights=True,
                                           mode='min')
lr_reduce = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_rmse', 
    factor=0.5, 
    patience=3, 
    mode='min', 
    verbose=1
)
import tensorflow.keras.backend as K

def r2_metric(y_true, y_pred):
    ss_res = K.sum(K.square(y_true - y_pred))
    ss_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return (1 - ss_res/(ss_tot + K.epsilon()))

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='mse',
    metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse'), r2_metric]
)

history=model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100,callbacks=[callback,lr_reduce],batch_size=16)


In [None]:
model.summary()

In [None]:
test_csv.head()

In [None]:
len(test_csv)

In [None]:
# test_pivoted = test_csv.pivot_table(index="image_path", columns="target_name", values="target").reset_index()
test_t_data=[]

for i in test_csv.image_path:
    image=Image.open(img_path + i)
    image=image.resize((128,128))
    image=image.convert("RGB")
    image=np.array(image)
    test_t_data.append(image)
test_t_data=np.array(test_t_data)
print("\n lenght of test data :",len(test_t_data))

In [None]:
test_csv.head()

In [None]:
preds = model.predict(test_t_data, batch_size=16)
pred_df = pd.DataFrame(preds, columns=target_cols)
result_df = pd.concat([test_csv["image_path"], pred_df], axis=1)


In [None]:
melted = result_df.melt(
    id_vars=['image_path'],
    value_vars=target_cols,
    var_name='target_name',
    value_name='target'
)

In [None]:
melted.drop_duplicates(inplace=True)

In [None]:
melted['sample_id'] = (
    melted['image_path'].str.extract(r'(ID\d+)')[0] + '__' + melted['target_name']
)
melted

In [None]:
result_df.head()

In [None]:
submission = melted.copy()

submission=submission[["sample_id","target"]]
submission.to_csv("submission.csv",index=False)
submission.head()

In [None]:
!ls -lh submission.csv
!head submission.csv