In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [2]:
# --- Configuration ---
num_cows = 100
num_days = 30 # Data for 30 days
readings_per_day = 4 # 4 readings per cow per day (e.g., every 6 hours)

# --- Data Generation Parameters ---
# Healthy ranges (approximate)
healthy_temp = {'mean': 38.5, 'std': 0.3} # Celsius
healthy_hr = {'mean': 65, 'std': 5} # BPM
healthy_activity = {'mean': 7, 'std': 1.5} # Score 1-10
healthy_rumination = {'mean': 400, 'std': 50} # minutes/day
healthy_feed_intake = {'mean': 15.0, 'std': 2.0} # kg/day
healthy_water_intake = {'mean': 80.0, 'std': 10.0} # L/day
healthy_respiration = {'mean': 25, 'std': 3} # BPM

# Sick deviations (relative to healthy)
# Temperature: +1.0 to +2.5 C
# Heart Rate: +10 to +25 BPM
# Activity: -3 to -6 score
# Rumination: -100 to -200 minutes/day
# Feed Intake: -5 to -10 kg/day
# Water Intake: -10 to -25 L/day
# Respiration Rate: +10 to +20 BPM

# Environmental data (can fluctuate)
ambient_temp_range = (20, 30) # Celsius
humidity_range = (50, 80) # Percent

# --- Generate Data ---
data = []
start_date = datetime(2025, 6, 1) # Start date for the dataset

for i in range(1, num_cows + 1):
    cow_id = f'Cow_{i:03d}'
    age_months = random.randint(12, 120) # Cows between 1 and 10 years old

    # Introduce a small percentage of 'sick' days randomly
    sick_days_count = random.choices([0, 1], weights=[0.9, 0.1], k=1)[0] * random.randint(1, 5) # 0-5 sick days per cow
    sick_day_indices = random.sample(range(num_days), sick_days_count) if sick_days_count > 0 else []

    for day in range(num_days):
        current_date = start_date + timedelta(days=day)

        # Determine if this is a sick day for this cow
        is_sick_day = day in sick_day_indices

        for reading_idx in range(readings_per_day):
            current_timestamp = current_date + timedelta(hours=reading_idx * (24 / readings_per_day))

            temp = np.random.normal(healthy_temp['mean'], healthy_temp['std'])
            hr = np.random.normal(healthy_hr['mean'], healthy_hr['std'])
            activity = np.random.normal(healthy_activity['mean'], healthy_activity['std'])
            rumination = np.random.normal(healthy_rumination['mean'], healthy_rumination['std'])
            feed_intake = np.random.normal(healthy_feed_intake['mean'], healthy_feed_intake['std'])
            water_intake = np.random.normal(healthy_water_intake['mean'], healthy_water_intake['std'])
            respiration_rate = np.random.normal(healthy_respiration['mean'], healthy_respiration['std'])

            health_status = 'Healthy'

            if is_sick_day:
                # Apply deviations for sick animals
                temp += random.uniform(1.0, 2.5)
                hr += random.uniform(10, 25)
                activity -= random.uniform(3, 6)
                rumination -= random.uniform(100, 200)
                feed_intake -= random.uniform(5, 10)
                water_intake -= random.uniform(10, 25)
                respiration_rate += random.uniform(10, 20)
                health_status = 'Sick'

            # Ensure values are within reasonable bounds and positive
            temp = round(max(37.0, temp), 1)
            hr = int(max(40, hr))
            activity = int(max(1, min(10, activity)))
            rumination = int(max(0, rumination))
            feed_intake = round(max(0.0, feed_intake), 1)
            water_intake = round(max(0.0, water_intake), 1)
            respiration_rate = int(max(10, respiration_rate))

            ambient_temp = round(random.uniform(*ambient_temp_range), 1)
            humidity = round(random.uniform(*humidity_range), 1)

            data.append([
                cow_id,
                current_timestamp,
                age_months,
                temp,
                hr,
                activity,
                rumination,
                feed_intake,
                water_intake,
                ambient_temp,
                humidity,
                respiration_rate,
                health_status
            ])

# Create DataFrame
columns = [
    'Cow_ID', 'Timestamp', 'Age_Months', 'Temperature_C', 'Heart_Rate_BPM',
    'Activity_Score', 'Rumination_Minutes_Day', 'Feed_Intake_KG_Day',
    'Water_Intake_L_Day', 'Ambient_Temp_C', 'Humidity_Percent',
    'Respiration_Rate_BPM', 'Health_Status'
]
df = pd.DataFrame(data, columns=columns)

# Display a sample of the generated data
print("Sample of Generated Data:")
print(df.head())

print("\nValue Counts for Health Status:")
print(df['Health_Status'].value_counts())

print("\nDescriptive Statistics for Numerical Features:")
print(df.describe())

# Optional: Save to CSV
# df.to_csv('synthetic_cow_health_data.csv', index=False)
# print("\nDataset saved to synthetic_cow_health_data.csv")

Sample of Generated Data:
    Cow_ID           Timestamp  Age_Months  Temperature_C  Heart_Rate_BPM  \
0  Cow_001 2025-06-01 00:00:00         109           38.4              65   
1  Cow_001 2025-06-01 06:00:00         109           38.7              67   
2  Cow_001 2025-06-01 12:00:00         109           38.2              57   
3  Cow_001 2025-06-01 18:00:00         109           39.0              63   
4  Cow_001 2025-06-02 00:00:00         109           38.5              67   

   Activity_Score  Rumination_Minutes_Day  Feed_Intake_KG_Day  \
0               9                     434                12.8   
1               6                     429                15.1   
2               3                     389                17.4   
3               6                     467                12.9   
4               8                     398                11.8   

   Water_Intake_L_Day  Ambient_Temp_C  Humidity_Percent  Respiration_Rate_BPM  \
0                93.3            23.0  

In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# --- Configuration ---
num_cows = 100
num_days = 30 # Data for 30 days
readings_per_day = 4 # 4 readings per cow per day (e.g., every 6 hours)
sick_cow_percentage = 0.20 # 20% of cows will have at least one sick day
sick_day_duration_avg = 3 # Average number of sick days for affected cows

# --- Data Generation Parameters ---
# Healthy ranges (approximate for typical dairy cows)
healthy_params = {
    'Temperature_C': {'mean': 38.5, 'std': 0.3, 'min': 37.5, 'max': 39.5},
    'Heart_Rate_BPM': {'mean': 65, 'std': 5, 'min': 50, 'max': 80},
    'Activity_Score': {'mean': 7, 'std': 1.5, 'min': 1, 'max': 10}, # 1=low, 10=high
    'Rumination_Minutes_Day': {'mean': 400, 'std': 50, 'min': 250, 'max': 500},
    'Feed_Intake_KG_Day': {'mean': 15.0, 'std': 2.0, 'min': 5.0, 'max': 25.0},
    'Water_Intake_L_Day': {'mean': 80.0, 'std': 10.0, 'min': 50.0, 'max': 120.0},
    'Respiration_Rate_BPM': {'mean': 25, 'std': 3, 'min': 15, 'max': 40}
}

# Sick deviations (added/subtracted from healthy parameters)
sick_deviations = {
    'Temperature_C': {'min_change': 1.0, 'max_change': 2.5}, # Increase
    'Heart_Rate_BPM': {'min_change': 10, 'max_change': 25}, # Increase
    'Activity_Score': {'min_change': -6, 'max_change': -3}, # Decrease
    'Rumination_Minutes_Day': {'min_change': -200, 'max_change': -100}, # Decrease
    'Feed_Intake_KG_Day': {'min_change': -10, 'max_change': -5}, # Decrease
    'Water_Intake_L_Day': {'min_change': -25, 'max_change': -10}, # Decrease
    'Respiration_Rate_BPM': {'min_change': 10, 'max_change': 20} # Increase
}

# Environmental data ranges
ambient_temp_range = (20, 30) # Celsius
humidity_range = (50, 80) # Percent

# --- Generate Data ---
data_records = []
start_date = datetime(2025, 1, 1) # Arbitrary start date

# Decide which cows will have sick days
sick_cow_ids = random.sample(
    [f'Cow_{i:03d}' for i in range(1, num_cows + 1)],
    k=int(num_cows * sick_cow_percentage)
)

for i in range(1, num_cows + 1):
    cow_id = f'Cow_{i:03d}'
    age_months = random.randint(12, 120) # Cows between 1 and 10 years old

    cow_sick_days = []
    if cow_id in sick_cow_ids:
        # Determine a start day for sickness
        sickness_start_day = random.randint(0, num_days - int(sick_day_duration_avg * 1.5))
        sickness_duration = random.randint(
            max(1, int(sick_day_duration_avg * 0.5)),
            int(sick_day_duration_avg * 1.5)
        )
        cow_sick_days = list(range(sickness_start_day, sickness_start_day + sickness_duration))
        # Ensure sick days don't exceed num_days
        cow_sick_days = [d for d in cow_sick_days if d < num_days]


    for day_offset in range(num_days):
        current_date = start_date + timedelta(days=day_offset)

        is_sick_day = day_offset in cow_sick_days
        health_status = 'Sick' if is_sick_day else 'Healthy'

        for reading_idx in range(readings_per_day):
            current_timestamp = current_date + timedelta(hours=reading_idx * (24 / readings_per_day))

            row = {
                'Cow_ID': cow_id,
                'Timestamp': current_timestamp,
                'Age_Months': age_months,
                'Ambient_Temp_C': round(random.uniform(*ambient_temp_range), 1),
                'Humidity_Percent': round(random.uniform(*humidity_range), 1),
                'Health_Status': health_status
            }

            for metric, params in healthy_params.items():
                value = np.random.normal(params['mean'], params['std'])
                if is_sick_day:
                    change_range = sick_deviations[metric]
                    # Add/subtract based on whether it's an increase or decrease
                    if change_range['min_change'] > 0: # Implies increase (e.g., Temp, HR)
                        value += random.uniform(change_range['min_change'], change_range['max_change'])
                    else: # Implies decrease (e.g., Activity, Rumination)
                        value += random.uniform(change_range['min_change'], change_range['max_change'])

                # Apply min/max bounds and round/cast to appropriate type
                value = max(params['min'], min(params['max'], value))
                if metric in ['Temperature_C', 'Feed_Intake_KG_Day', 'Water_Intake_L_Day']:
                    row[metric] = round(value, 1)
                else: # For integer metrics like BPM, Score, Minutes
                    row[metric] = int(round(value))

            data_records.append(row)

# Create DataFrame
df = pd.DataFrame(data_records)

# --- Display Information ---
print("🌟 Synthetic Livestock Health Data Generated! 🌟\n")
print("First 5 rows of the DataFrame:")
print(df.head())

print("\n--- Basic Statistics ---")
print("Value Counts for Health Status:")
print(df['Health_Status'].value_counts())
print("\nDescription of Numerical Features:")
print(df.describe())

# Optional: Save to CSV
# df.to_csv('synthetic_livestock_health_data.csv', index=False)
# print("\nDataset saved to synthetic_livestock_health_data.csv")

🌟 Synthetic Livestock Health Data Generated! 🌟

First 5 rows of the DataFrame:
    Cow_ID           Timestamp  Age_Months  Ambient_Temp_C  Humidity_Percent  \
0  Cow_001 2025-01-01 00:00:00          74            24.2              66.8   
1  Cow_001 2025-01-01 06:00:00          74            25.1              53.7   
2  Cow_001 2025-01-01 12:00:00          74            28.1              55.1   
3  Cow_001 2025-01-01 18:00:00          74            28.2              60.8   
4  Cow_001 2025-01-02 00:00:00          74            20.2              69.3   

  Health_Status  Temperature_C  Heart_Rate_BPM  Activity_Score  \
0       Healthy           38.1              64               7   
1       Healthy           38.1              69               8   
2       Healthy           38.5              71               6   
3       Healthy           38.1              61               6   
4       Healthy           38.6              72               6   

   Rumination_Minutes_Day  Feed_Intake_KG_D

In [None]:
import pandas as pd

# Create a dictionary for the summary statistics
data = {
    "Feature": [
        "Age_Months", "Ambient_Temp_C", "Humidity_Percent", "Temperature_C", 
        "Heart_Rate_BPM", "Activity_Score", "Rumination_Minutes_Day", 
        "Feed_Intake_KG_Day", "Water_Intake_L_Day", "Respiration_Rate_BPM"
    ],
    "Mean": [62.23, 24.977325, 65.046325, 38.514783, 65.270833, 6.916167, 397.608333, 14.877383, 79.695542, 25.1915],
    "Std Dev": [33.277461, 2.877898, 8.675855, 0.320562, 5.181221, 1.585302, 50.943435, 2.18282, 10.204928, 3.415255],
    "Min": [12.0, 20.0, 50.0, 37.5, 50.0, 1.0, 250.0, 5.0, 50.0, 15.0],
    "25%": [30.75, 22.5, 57.5, 38.3, 62.0, 6.0, 365.0, 13.6, 72.9, 23.0],
    "50%": [62.5, 25.0, 65.0, 38.5, 65.0, 7.0, 399.0, 14.9, 79.6, 25.0],
    "75%": [90.25, 27.5, 72.525, 38.7, 69.0, 8.0, 433.0, 16.3, 86.6, 27.0],
    "Max": [119.0, 30.0, 80.0, 39.5, 80.0, 10.0, 500.0, 23.0, 120.0, 40.0]
}

# Create the DataFrame
stats_df = pd.DataFrame(data)

# Save as CSV
csv_path = "/mnt/data/cow_health_statistics_summary.csv"
stats_df.to_csv(csv_path, index=False)

csv_path

In [6]:


import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# --- Configuration ---
num_cows = 100
num_days = 30 # Data for 30 days
readings_per_day = 4 # 4 readings per cow per day (e.g., every 6 hours)
sick_cow_percentage = 0.20 # 20% of cows will have at least one sick day
sick_day_duration_avg = 3 # Average number of sick days for affected cows

# --- Data Generation Parameters ---
# Healthy ranges (approximate for typical dairy cows)
healthy_params = {
    'temperature_celsius': {'mean': 38.5, 'std': 0.3, 'min': 37.5, 'max': 39.5},
    'heart_rate_bpm': {'mean': 65, 'std': 5, 'min': 50, 'max': 80},
    'activity_score': {'mean': 7, 'std': 1.5, 'min': 1, 'max': 10}, # 1=low, 10=high
    'rumination_minutes_day': {'mean': 400, 'std': 50, 'min': 250, 'max': 500},
    'feed_intake_kg_day': {'mean': 15.0, 'std': 2.0, 'min': 5.0, 'max': 25.0},
    'water_intake_liters_day': {'mean': 80.0, 'std': 10.0, 'min': 50.0, 'max': 120.0},
    'respiration_rate_bpm': {'mean': 25, 'std': 3, 'min': 15, 'max': 40}
}

# Sick deviations (added/subtracted from healthy parameters)
sick_deviations = {
    'temperature_celsius': {'min_change': 1.0, 'max_change': 2.5}, # Increase
    'heart_rate_bpm': {'min_change': 10, 'max_change': 25}, # Increase
    'activity_score': {'min_change': -6, 'max_change': -3}, # Decrease
    'rumination_minutes_day': {'min_change': -200, 'max_change': -100}, # Decrease
    'feed_intake_kg_day': {'min_change': -10, 'max_change': -5}, # Decrease
    'water_intake_liters_day': {'min_change': -25, 'max_change': -10}, # Decrease
    'respiration_rate_bpm': {'min_change': 10, 'max_change': 20} # Increase
}

# Environmental data ranges
ambient_temp_range = (20, 30) # Celsius
humidity_range = (50, 80) # Percent

# --- Generate Data ---
data_records = []
start_date = datetime(2025, 1, 1) # Arbitrary start date

# Decide which cows will have sick days
sick_cow_ids = random.sample(
    [f'cow_{i:03d}' for i in range(1, num_cows + 1)], # Using snake_case for cow IDs too
    k=int(num_cows * sick_cow_percentage)
)

for i in range(1, num_cows + 1):
    cow_id = f'cow_{i:03d}'
    age_months = random.randint(12, 120) # Cows between 1 and 10 years old

    cow_sick_days = []
    if cow_id in sick_cow_ids:
        sickness_start_day = random.randint(0, num_days - int(sick_day_duration_avg * 1.5))
        sickness_duration = random.randint(
            max(1, int(sick_day_duration_avg * 0.5)),
            int(sick_day_duration_avg * 1.5)
        )
        cow_sick_days = list(range(sickness_start_day, sickness_start_day + sickness_duration))
        cow_sick_days = [d for d in cow_sick_days if d < num_days] # Ensure sick days within num_days


    for day_offset in range(num_days):
        current_date = start_date + timedelta(days=day_offset)

        is_sick_day = day_offset in cow_sick_days
        health_status = 'sick' if is_sick_day else 'healthy' # Lowercase for categorical consistency

        for reading_idx in range(readings_per_day):
            current_timestamp = current_date + timedelta(hours=reading_idx * (24 / readings_per_day))

            row = {
                'cow_id': cow_id,
                'timestamp': current_timestamp,
                'age_months': age_months,
                'ambient_temp_celsius': round(random.uniform(*ambient_temp_range), 1),
                'humidity_percent': round(random.uniform(*humidity_range), 1),
                'health_status': health_status
            }

            for metric, params in healthy_params.items():
                value = np.random.normal(params['mean'], params['std'])
                if is_sick_day:
                    change_range = sick_deviations[metric]
                    if change_range['min_change'] > 0:
                        value += random.uniform(change_range['min_change'], change_range['max_change'])
                    else:
                        value += random.uniform(change_range['min_change'], change_range['max_change'])

                value = max(params['min'], min(params['max'], value))
                if metric in ['temperature_celsius', 'feed_intake_kg_day', 'water_intake_liters_day']:
                    row[metric] = round(value, 1)
                else:
                    row[metric] = int(round(value))

            data_records.append(row)

# Create DataFrame
df = pd.DataFrame(data_records)

# --- Post-processing for Data Types ---
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['cow_id'] = df['cow_id'].astype('category')
df['health_status'] = df['health_status'].astype('category')
# Numerical columns are generally inferred correctly but can be explicitly set
# df['age_months'] = df['age_months'].astype('int16')
# df['temperature_celsius'] = df['temperature_celsius'].astype('float32')

# --- Display Refined DataFrame Information ---
print("✨ Refined Synthetic Livestock Health Data! ✨\n")
print("First 5 rows of the DataFrame with refined columns:")
print(df.head())

print("\n--- DataFrame Info (Data Types and Non-Null Counts) ---")
df.info()

print("\n--- Basic Statistics ---")
print("Value Counts for Health Status:")
print(df['health_status'].value_counts())

# Optional: Save to CSV
# df.to_csv('synthetic_livestock_health_data_refined.csv', index=False)
# print("\nDataset saved to synthetic_livestock_health_data_refined.csv")

✨ Refined Synthetic Livestock Health Data! ✨

First 5 rows of the DataFrame with refined columns:
    cow_id           timestamp  age_months  ambient_temp_celsius  \
0  cow_001 2025-01-01 00:00:00         104                  20.8   
1  cow_001 2025-01-01 06:00:00         104                  23.1   
2  cow_001 2025-01-01 12:00:00         104                  26.2   
3  cow_001 2025-01-01 18:00:00         104                  23.7   
4  cow_001 2025-01-02 00:00:00         104                  20.1   

   humidity_percent health_status  temperature_celsius  heart_rate_bpm  \
0              73.7       healthy                 38.1              59   
1              70.1       healthy                 38.5              63   
2              75.1       healthy                 38.2              62   
3              51.9       healthy                 38.5              64   
4              77.1       healthy                 38.2              55   

   activity_score  rumination_minutes_day  feed_

In [7]:
df.to_csv('synthetic_livestock_health_data.csv', index=False)

In [8]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# --- Configuration ---
num_cows = 100
num_days = 30 # Data for 30 days
readings_per_day = 4 # 4 readings per cow per day (e.g., every 6 hours)
sick_cow_percentage = 0.20 # 20% of cows will have at least one sick day
sick_day_duration_avg = 3 # Average number of sick days for affected cows

# --- Data Generation Parameters ---
# Healthy ranges (approximate for typical dairy cows)
healthy_params = {
    'temperature_celsius': {'mean': 38.5, 'std': 0.3, 'min': 37.5, 'max': 39.5},
    'heart_rate_bpm': {'mean': 65, 'std': 5, 'min': 50, 'max': 80},
    'activity_score': {'mean': 7, 'std': 1.5, 'min': 1, 'max': 10}, # 1=low, 10=high
    'rumination_minutes_day': {'mean': 400, 'std': 50, 'min': 250, 'max': 500},
    'feed_intake_kg_day': {'mean': 15.0, 'std': 2.0, 'min': 5.0, 'max': 25.0},
    'water_intake_liters_day': {'mean': 80.0, 'std': 10.0, 'min': 50.0, 'max': 120.0},
    'respiration_rate_bpm': {'mean': 25, 'std': 3, 'min': 15, 'max': 40}
}

# Sick deviations (added/subtracted from healthy parameters)
sick_deviations = {
    'temperature_celsius': {'min_change': 1.0, 'max_change': 2.5}, # Increase
    'heart_rate_bpm': {'min_change': 10, 'max_change': 25}, # Increase
    'activity_score': {'min_change': -6, 'max_change': -3}, # Decrease
    'rumination_minutes_day': {'min_change': -200, 'max_change': -100}, # Decrease
    'feed_intake_kg_day': {'min_change': -10, 'max_change': -5}, # Decrease
    'water_intake_liters_day': {'min_change': -25, 'max_change': -10}, # Decrease
    'respiration_rate_bpm': {'min_change': 10, 'max_change': 20} # Increase
}

# Environmental data ranges
ambient_temp_range = (20, 30) # Celsius
humidity_range = (50, 80) # Percent

# --- Generate Data ---
data_records = []
start_date = datetime(2025, 1, 1) # Arbitrary start date

# Decide which cows will have sick days
sick_cow_ids = random.sample(
    [f'cow_{i:03d}' for i in range(1, num_cows + 1)], # Using snake_case for cow IDs too
    k=int(num_cows * sick_cow_percentage)
)

for i in range(1, num_cows + 1):
    cow_id = f'cow_{i:03d}'
    age_months = random.randint(12, 120) # Cows between 1 and 10 years old

    cow_sick_days = []
    if cow_id in sick_cow_ids:
        sickness_start_day = random.randint(0, num_days - int(sick_day_duration_avg * 1.5))
        sickness_duration = random.randint(
            max(1, int(sick_day_duration_avg * 0.5)),
            int(sick_day_duration_avg * 1.5)
        )
        cow_sick_days = list(range(sickness_start_day, sickness_start_day + sickness_duration))
        cow_sick_days = [d for d in cow_sick_days if d < num_days] # Ensure sick days within num_days


    for day_offset in range(num_days):
        current_date = start_date + timedelta(days=day_offset)

        is_sick_day = day_offset in cow_sick_days
        health_status = 'sick' if is_sick_day else 'healthy' # Lowercase for categorical consistency

        for reading_idx in range(readings_per_day):
            current_timestamp = current_date + timedelta(hours=reading_idx * (24 / readings_per_day))

            row = {
                'cow_id': cow_id,
                'timestamp': current_timestamp,
                'age_months': age_months,
                'ambient_temp_celsius': round(random.uniform(*ambient_temp_range), 1),
                'humidity_percent': round(random.uniform(*humidity_range), 1),
                'health_status': health_status
            }

            for metric, params in healthy_params.items():
                value = np.random.normal(params['mean'], params['std'])
                if is_sick_day:
                    change_range = sick_deviations[metric]
                    if change_range['min_change'] > 0:
                        value += random.uniform(change_range['min_change'], change_range['max_change'])
                    else:
                        value += random.uniform(change_range['min_change'], change_range['max_change'])

                value = max(params['min'], min(params['max'], value))
                if metric in ['temperature_celsius', 'feed_intake_kg_day', 'water_intake_liters_day']:
                    row[metric] = round(value, 1)
                else:
                    row[metric] = int(round(value))

            data_records.append(row)

# Create DataFrame
df = pd.DataFrame(data_records)

# --- Post-processing for Data Types ---
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['cow_id'] = df['cow_id'].astype('category')
df['health_status'] = df['health_status'].astype('category')

# --- Display Refined DataFrame Information ---
print("✨ Refined Synthetic Livestock Health Data! ✨\n")
print("First 5 rows of the DataFrame with refined columns:")
print(df.head())

print("\n--- DataFrame Info (Data Types and Non-Null Counts) ---")
df.info()

print("\n--- Basic Statistics ---")
print("Value Counts for Health Status:")
print(df['health_status'].value_counts())

# --- SAVE TO CSV ---
output_filename = 'synthetic_livestock_health_data.csv'
df.to_csv(output_filename, index=False)
print(f"\n🎉 Dataset successfully saved to {output_filename} 🎉")

✨ Refined Synthetic Livestock Health Data! ✨

First 5 rows of the DataFrame with refined columns:
    cow_id           timestamp  age_months  ambient_temp_celsius  \
0  cow_001 2025-01-01 00:00:00          86                  26.4   
1  cow_001 2025-01-01 06:00:00          86                  26.3   
2  cow_001 2025-01-01 12:00:00          86                  25.2   
3  cow_001 2025-01-01 18:00:00          86                  22.2   
4  cow_001 2025-01-02 00:00:00          86                  25.6   

   humidity_percent health_status  temperature_celsius  heart_rate_bpm  \
0              78.5       healthy                 38.4              69   
1              64.4       healthy                 38.3              60   
2              50.1       healthy                 38.3              69   
3              69.9       healthy                 38.6              67   
4              77.2       healthy                 38.2              59   

   activity_score  rumination_minutes_day  feed_