In [1]:
import pandas as pd
import numpy as np

# For reproducibility:
np.random.seed(42)

# Number of samples per crop
n_per_crop = 200

# -----------------------------
# Generate synthetic data for each crop based on legitimate ranges:
# -----------------------------

# Rice: high moisture, moderate temperature, high humidity, moderate light.
rice_df = pd.DataFrame({
    "Soil Moisture": np.random.uniform(65, 75, n_per_crop).round(2),
    "Environment Temperature": np.random.uniform(24, 30, n_per_crop).round(2),
    "Environment Humidity": np.random.uniform(80, 100, n_per_crop).round(2),
    "Environment Light Intensity": np.random.uniform(40, 60, n_per_crop).round(2),
    "Crop": "Rice"
})

# Wheat: moderate moisture, cooler temperature, moderate humidity, lower light.
wheat_df = pd.DataFrame({
    "Soil Moisture": np.random.uniform(55, 65, n_per_crop).round(2),
    "Environment Temperature": np.random.uniform(15, 25, n_per_crop).round(2),
    "Environment Humidity": np.random.uniform(50, 70, n_per_crop).round(2),
    "Environment Light Intensity": np.random.uniform(30, 50, n_per_crop).round(2),
    "Crop": "Wheat"
})

# Maize: moderate moisture, higher temperature, moderate humidity, high light.
maize_df = pd.DataFrame({
    "Soil Moisture": np.random.uniform(50, 60, n_per_crop).round(2),
    "Environment Temperature": np.random.uniform(25, 35, n_per_crop).round(2),
    "Environment Humidity": np.random.uniform(40, 60, n_per_crop).round(2),
    "Environment Light Intensity": np.random.uniform(60, 100, n_per_crop).round(2),
    "Crop": "Maize"
})

# Cotton: lower moisture, high temperature, low humidity, very high light.
cotton_df = pd.DataFrame({
    "Soil Moisture": np.random.uniform(40, 55, n_per_crop).round(2),
    "Environment Temperature": np.random.uniform(30, 35, n_per_crop).round(2),
    "Environment Humidity": np.random.uniform(30, 50, n_per_crop).round(2),
    "Environment Light Intensity": np.random.uniform(70, 100, n_per_crop).round(2),
    "Crop": "Cotton"
})

# Barley: intermediate ranges.
barley_df = pd.DataFrame({
    "Soil Moisture": np.random.uniform(45, 60, n_per_crop).round(2),
    "Environment Temperature": np.random.uniform(20, 30, n_per_crop).round(2),
    "Environment Humidity": np.random.uniform(55, 75, n_per_crop).round(2),
    "Environment Light Intensity": np.random.uniform(30, 70, n_per_crop).round(2),
    "Crop": "Barley"
})

# Sugarcane: very high moisture, warm temperature, very high humidity, moderate light.
sugarcane_df = pd.DataFrame({
    "Soil Moisture": np.random.uniform(70, 80, n_per_crop).round(2),
    "Environment Temperature": np.random.uniform(27, 33, n_per_crop).round(2),
    "Environment Humidity": np.random.uniform(85, 100, n_per_crop).round(2),
    "Environment Light Intensity": np.random.uniform(50, 70, n_per_crop).round(2),
    "Crop": "Sugarcane"
})

# -----------------------------
# Combine and Shuffle the Data
# -----------------------------
merged_df = pd.concat([rice_df, wheat_df, maize_df, cotton_df, barley_df, sugarcane_df], ignore_index=True)
merged_df = merged_df.sample(frac=1).reset_index(drop=True)  # Shuffle the dataset

# Create a DateTime column: generate one timestamp per minute for the total samples
n_total = merged_df.shape[0]
date_times = pd.date_range(start="2024-09-27 12:00:00", periods=n_total, freq="1min")
merged_df.insert(0, "DateTime", date_times)

# -----------------------------
# Save and Display the Final Merged Dataset
# -----------------------------
output_filename = "final_merged_crop_dataset.csv"
merged_df.to_csv(output_filename, index=False)

print("Final merged dataset (first 10 rows):")
print(merged_df.head(10))
print(f"\nDataset shape: {merged_df.shape}")
print(f"Dataset saved as '{output_filename}'.")


Final merged dataset (first 10 rows):
             DateTime  Soil Moisture  Environment Temperature  \
0 2024-09-27 12:00:00          59.46                    16.30   
1 2024-09-27 12:01:00          59.52                    26.59   
2 2024-09-27 12:02:00          48.62                    25.63   
3 2024-09-27 12:03:00          67.26                    28.10   
4 2024-09-27 12:04:00          52.55                    20.95   
5 2024-09-27 12:05:00          74.44                    27.31   
6 2024-09-27 12:06:00          49.88                    25.79   
7 2024-09-27 12:07:00          51.75                    25.99   
8 2024-09-27 12:08:00          63.35                    19.96   
9 2024-09-27 12:09:00          57.71                    17.34   

   Environment Humidity  Environment Light Intensity       Crop  
0                 64.14                        47.88      Wheat  
1                 61.90                        59.26     Barley  
2                 72.69                        3