In [1]:
import numpy as np
import pandas as pd

# Set sample size
sample_size = 1000

# Introduce breed-specific weight and egg production characteristics
breeds = ["Leghorn", "Rhode Island Red", "Sussex", "Plymouth Rock"]
feed_types = ["Type A", "Type B", "Type C"]

# Create the dataset with patterns
np.random.seed(42)  # For reproducibility

# Generate random data for each feature
razza = np.random.choice(breeds, size=sample_size)
mangime = np.random.choice(feed_types, size=sample_size)
temperatura = np.random.uniform(20, 35, size=sample_size)  # temperature in Celsius
eta = np.random.randint(5, 75, size=sample_size)  # Age of hen in months

# Initialize arrays for weight and eggs based on patterns
peso = np.zeros(sample_size)
q_uova_mensili = np.zeros(sample_size)

# Apply patterns based on breed, feed type, and age
for i in range(sample_size):
    breed = razza[i]
    feed = mangime[i]
    age = eta[i]
    
    # Weight pattern (based on breed and feed type)
    if breed == "Leghorn":
        peso[i] = np.random.uniform(1500, 2500)  # Lighter weight
    elif breed == "Rhode Island Red":
        peso[i] = np.random.uniform(2500, 3500)  # Heavier weight
    elif breed == "Sussex":
        peso[i] = np.random.uniform(2000, 3000)  # Medium weight
    else:  # Plymouth Rock
        peso[i] = np.random.uniform(2200, 3200)
    
    # Adjust weight based on feed type
    if feed == "Type A":
        peso[i] += np.random.uniform(100, 300)  # Type A might lead to slightly heavier hens
    elif feed == "Type B":
        peso[i] += np.random.uniform(200, 400)  # Type B might increase weight more
    
    # Egg production pattern (based on age, breed, and feed type)
    if age <= 12:
        q_uova_mensili[i] = np.random.poisson(lam=8)  # Younger hens lay more eggs
    elif age <= 24:
        q_uova_mensili[i] = np.random.poisson(lam=6)  # Slightly fewer eggs
    else:
        q_uova_mensili[i] = np.random.poisson(lam=4)  # Older hens lay fewer eggs
    
    # Adjust egg production based on breed and feed type
    if breed == "Leghorn":
        q_uova_mensili[i] += np.random.randint(2, 4)  # Leghorns lay more eggs
    elif breed == "Rhode Island Red":
        q_uova_mensili[i] -= np.random.randint(0, 2)  # Fewer eggs for Rhode Island Red
    
    if feed == "Type A":
        q_uova_mensili[i] += np.random.randint(0, 2)  # Type A feed leads to higher egg production
    elif feed == "Type B":
        q_uova_mensili[i] -= np.random.randint(0, 1)  # Type B feed results in fewer eggs
    
    # Temperature effect on egg production
    if temperatura[i] < 22 or temperatura[i] > 30:
        q_uova_mensili[i] -= np.random.randint(0, 2)  # Extreme temperatures reduce egg production

# Create the DataFrame
data = {
    "eta": eta,
    "razza": razza,
    "mangime": mangime,
    "temperatura": temperatura,
    "peso": peso,
    "q_uova_mensili": q_uova_mensili
}

df = pd.DataFrame(data)

# Show the first few rows of the dataset
print(df.head())

   eta          razza mangime  temperatura         peso  q_uova_mensili
0   24         Sussex  Type B    33.417199  2713.508396             3.0
1   38  Plymouth Rock  Type C    34.872130  2313.303505             5.0
2   30        Leghorn  Type A    20.922755  2308.840657             7.0
3    9         Sussex  Type A    33.243594  3110.881379             4.0
4   65         Sussex  Type A    27.741185  2486.098303             7.0


In [2]:
df.to_csv("dataset.csv")