<div style="
    background: linear-gradient(135deg, #0a0a0a, #1b1b1b, #2c2c2c);
    border: 2px solid #00FFB2;
    border-radius: 14px;
    padding: 18px 22px;
    box-shadow: 0 0 18px rgba(0, 255, 178, 0.35);
    font-family: 'Segoe UI', sans-serif;
    color: #f2f2f2;
    line-height: 1.6;
">

<h2 style="text-align:center; color:#00FFB2; text-shadow:0 0 8px #00FFC6;">
ðŸŒ¿ CSIRO Image2Biomass â€” Baseline AI Approach
</h2>

<p style="font-size:15px; text-align:center; color:#e6e6e6; margin-top:-8px;">
Created by <b>Shreyash Patil</b> | AI â€¢ Data Science â€¢ Computer Vision
</p>

<p style="font-size:15px; text-align:justify; color:#f2f2f2;">
This project provides a clean and interpretable <b style="color:#00FFB2;">baseline solution</b> for the 
<b>CSIRO Image2Biomass</b> competition. Using <b>median-based estimation</b> and simple visualization,
the model establishes a solid foundation for further improvements using advanced AI or vision-based approaches.
</p>

<h4 style="color:#00FFB2; margin-bottom:6px;">ðŸŽ¯ Project Goal:</h4>
<ul style="font-size:15px; margin-left:18px; color:#e6e6e6;">
  <li>Estimate plant biomass using structured tabular data.</li>
  <li>Build a reliable median baseline for each target type.</li>
  <li>Visualize key patterns in height and biomass metrics.</li>
</ul>

<p style="text-align:center; margin-top:10px; font-size:13px;">
ðŸ“Š <b>Kaggle Competition:</b> <a href="https://www.kaggle.com/competitions/csiro-biomass" style="color:#00FFB2;">CSIRO Image2Biomass</a>  
| ðŸ’» <a href="https://github.com/ShreyashPatil530" style="color:#00FFB2;">GitHub</a>  
| ðŸ“ˆ <a href="https://www.kaggle.com/shreyashpatil217" style="color:#00FFB2;">Kaggle</a>
</p>

</div>


In [None]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

In [None]:
df_train = pd.read_csv('/kaggle/input/csiro-biomass/train.csv')
df_test  = pd.read_csv('/kaggle/input/csiro-biomass/test.csv')

In [None]:
print(df_train.shape)
print(df_test.shape)

In [None]:
display(df_train.head(), df_test.head())

In [None]:
median_target_by_name = (
    df_train
    .groupby('target_name')['target']
    .median()
    .reset_index()
)

print("\nMedian Target by Target Name:")
display(median_target_by_name)

In [None]:
plt.figure(figsize=(8,5))
plt.bar(median_target_by_name['target_name'], median_target_by_name['target'])
plt.title('Median Target Value per Target Type')
plt.xlabel('Target Name')
plt.ylabel('Median Target')
plt.xticks(rotation=30)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8,5))
plt.scatter(df_train['Height_Ave_cm'], df_train['target'], alpha=0.6)
plt.title('Height vs Target')
plt.xlabel('Average Height (cm)')
plt.ylabel('Target')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
df_submission = (
    df_test
    .merge(median_target_by_name, on='target_name', how='left')
    [["sample_id", "target"]]
)

In [None]:
df_submission['target'] = (
    df_submission['target'] * random.choice([1.0000107, 1.0000104]) * 0.50 +
    df_submission['target'] / random.choice([1.0000107, 1.0000104]) * 0.50
)

In [None]:
df_submission.to_csv('submission.csv', index=False)

print("\nSubmission file saved as 'submission.csv'")
display(df_submission.head())