<a href="https://colab.research.google.com/github/Francis-Mwaniki/E-diary-prediction-system/blob/main/User_modified_e_diary_feeds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor

# Step 1: Generate and Save the Dataset

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Simulating the dataset
data = {
    'AnimalID': np.arange(1, n_samples + 1),
    'Breed': np.random.choice(['BreedA', 'BreedB', 'BreedC'], n_samples),
    'Age': np.random.randint(2, 10, n_samples),  # Age in years
    'Weight': np.random.randint(300, 800, n_samples),  # Weight in kg
    'LactationPeriod': np.random.randint(1, 365, n_samples),  # Days since last calving
    'HealthStatus': np.random.choice(['Healthy', 'Sick'], n_samples, p=[0.9, 0.1]),
    'FeedType': np.random.choice(['TypeA', 'TypeB', 'TypeC'], n_samples),
    'FeedAmount': np.random.uniform(10, 50, n_samples),  # Amount of feed per day in kg
    'WaterIntake': np.random.uniform(20, 100, n_samples),  # Water intake per day in liters
    'Temperature': np.random.uniform(15, 35, n_samples),  # Temperature in degree Celsius
    'Humidity': np.random.uniform(30, 90, n_samples),  # Humidity in percentage
    'BarnCondition': np.random.choice(['Clean', 'Moderate', 'Dirty'], n_samples),
    'PastMilkProduction': np.random.uniform(10, 30, n_samples),  # Past milk production in liters per day
    'MilkingFrequency': np.random.randint(1, 3, n_samples),  # Times per day
    'VetVisits': np.random.randint(0, 5, n_samples)  # Veterinary visits per month
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Adding a target variable (Milk Production) with some random noise
df['MilkProduction'] = (
    0.1 * df['Age'] +
    0.2 * df['Weight'] +
    0.3 * df['LactationPeriod'] +
    0.2 * df['FeedAmount'] +
    0.1 * df['WaterIntake'] +
    0.05 * df['PastMilkProduction'] +
    np.random.normal(0, 1, n_samples)  # Adding noise
)

# Save the dataset to a CSV file
df.to_csv('data.csv', index=False)

# Display the first few rows of the dataset
print("Generated dataset:")
print(df.head())

# Step 2: Train the Model and Create a Prediction Function

def train_model():
    # Load the dataset
    df = pd.read_csv('data.csv')

    # Splitting the data into features (X) and target (y)
    X = df.drop('MilkProduction', axis=1)
    y = df['MilkProduction']

    # Splitting into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Preprocessing pipeline
    numeric_features = ['Age', 'Weight', 'LactationPeriod', 'FeedAmount', 'WaterIntake', 'PastMilkProduction', 'MilkingFrequency', 'VetVisits']
    categorical_features = ['Breed', 'HealthStatus', 'FeedType', 'BarnCondition']

    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)])

    # Creating the final pipeline with a RandomForestRegressor
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])

    # Training the model
    model.fit(X_train, y_train)

    return model

def predict_milk_production(model, input_data):
    # Creating a DataFrame for the input data
    input_df = pd.DataFrame([input_data])

    # Making a prediction
    prediction = model.predict(input_df)

    return prediction[0]

# Example usage
# Train the model
model = train_model()

# Define the input data for prediction
input_data = {
    'AnimalID': 1,
    'Breed': 'BreedA',
    'Age': 5,
    'Weight': 500,
    'LactationPeriod': 200,
    'HealthStatus': 'Healthy',
    'FeedType': 'TypeB',
    'FeedAmount': 30,
    'WaterIntake': 50,
    'Temperature': 25,
    'Humidity': 60,
    'BarnCondition': 'Clean',
    'PastMilkProduction': 20,
    'MilkingFrequency': 2,
    'VetVisits': 1
}

# Predict milk production
predicted_milk_production = predict_milk_production(model, input_data)
print(f"Predicted Milk Production: {predicted_milk_production:.2f} liters")


Generated dataset:
   AnimalID   Breed  Age  Weight  LactationPeriod HealthStatus FeedType  \
0         1  BreedC    8     316              281      Healthy    TypeA   
1         2  BreedA    4     659              253         Sick    TypeC   
2         3  BreedC    4     522              310      Healthy    TypeC   
3         4  BreedC    8     653              289      Healthy    TypeC   
4         5  BreedA    9     487              164      Healthy    TypeC   

   FeedAmount  WaterIntake  Temperature   Humidity BarnCondition  \
0   36.017010    86.238117    20.518977  65.407644         Dirty   
1   18.493359    50.239523    29.620207  61.449280         Clean   
2   14.880297    98.494525    16.532448  46.980038         Dirty   
3   38.055331    57.300263    28.280661  32.278558         Dirty   
4   14.537633    83.354850    19.751200  89.675651      Moderate   

   PastMilkProduction  MilkingFrequency  VetVisits  MilkProduction  
0           27.972779                 1          0  

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor

def train_model():
    # Load the dataset
    df = pd.read_csv('data.csv')

    # Splitting the data into features (X) and target (y)
    X = df.drop('MilkProduction', axis=1)
    y = df['MilkProduction']

    # Splitting into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Preprocessing pipeline
    numeric_features = ['Age', 'Weight', 'LactationPeriod', 'FeedAmount', 'WaterIntake', 'PastMilkProduction', 'MilkingFrequency', 'VetVisits']
    categorical_features = ['Breed', 'HealthStatus', 'FeedType', 'BarnCondition']

    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)])

    # Creating the final pipeline with a RandomForestRegressor
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])

    # Training the model
    model.fit(X_train, y_train)

    return model

def predict_milk_production(model, input_data):
    # Creating a DataFrame for the input data
    input_df = pd.DataFrame([input_data])

    # Making a prediction
    prediction = model.predict(input_df)

    return prediction[0]

# Example usage
if __name__ == "__main__":
    # Train the model
    model = train_model()

    # Prompt the user for input data
    input_data = {
        'AnimalID': int(input("Enter Animal ID: ")),
        'Breed': input("Enter Breed (BreedA, BreedB, BreedC): "),
        'Age': int(input("Enter Age in years: ")),
        'Weight': float(input("Enter Weight in kg: ")),
        'LactationPeriod': int(input("Enter Lactation Period in days: ")),
        'HealthStatus': input("Enter Health Status (Healthy, Sick): "),
        'FeedType': input("Enter Feed Type (TypeA, TypeB, TypeC): "),
        'FeedAmount': float(input("Enter Feed Amount in kg per day: ")),
        'WaterIntake': float(input("Enter Water Intake in liters per day: ")),
        'Temperature': float(input("Enter Temperature in degree Celsius: ")),
        'Humidity': float(input("Enter Humidity in percentage: ")),
        'BarnCondition': input("Enter Barn Condition (Clean, Moderate, Dirty): "),
        'PastMilkProduction': float(input("Enter Past Milk Production in liters per day: ")),
        'MilkingFrequency': int(input("Enter Milking Frequency per day: ")),
        'VetVisits': int(input("Enter Veterinary Visits per month: "))
    }

    # Predict milk production
    predicted_milk_production = predict_milk_production(model, input_data)
    print(f"Predicted Milk Production: {predicted_milk_production:.2f} liters")


Enter Animal ID: 1
Enter Breed (BreedA, BreedB, BreedC): BreedA
Enter Age in years: 6
Enter Weight in kg: 200
Enter Lactation Period in days: 300
Enter Health Status (Healthy, Sick): Healthy
Enter Feed Type (TypeA, TypeB, TypeC): TypeB
Enter Feed Amount in kg per day: 30
Enter Water Intake in liters per day: 20
Enter Temperature in degree Celsius: 20
Enter Humidity in percentage: 65
Enter Barn Condition (Clean, Moderate, Dirty): Clean
Enter Past Milk Production in liters per day: 20
Enter Milking Frequency per day: 2
Enter Veterinary Visits per month: 1
Predicted Milk Production: 165.90 liters


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor

def train_model():
    # Load the dataset
    df = pd.read_csv('data.csv')

    # Splitting the data into features (X) and target (y)
    X = df.drop('MilkProduction', axis=1)
    y = df['MilkProduction']

    # Splitting into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Preprocessing pipeline
    numeric_features = ['Age', 'Weight', 'LactationPeriod', 'FeedAmount', 'WaterIntake', 'PastMilkProduction', 'MilkingFrequency', 'VetVisits']
    categorical_features = ['Breed', 'HealthStatus', 'FeedType', 'BarnCondition']

    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)])

    # Creating the final pipeline with a RandomForestRegressor
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])

    # Training the model
    model.fit(X_train, y_train)

    return model

def predict_milk_production(model, input_data):
    # Creating a DataFrame for the input data
    input_df = pd.DataFrame([input_data])

    # Making a prediction
    prediction = model.predict(input_df)

    return prediction[0]

if __name__ == "__main__":
    # Train the model
    model = train_model()

    # Prompt the user for input data
    input_data = {
        'AnimalID': int(input("Enter Animal ID: ")),
        'Breed': input("Enter Breed (BreedA, BreedB, BreedC): "),
        'Age': int(input("Enter Age in years: ")),
        'Weight': float(input("Enter Weight in kg: ")),
        'LactationPeriod': int(input("Enter Lactation Period in days: ")),
        'HealthStatus': input("Enter Health Status (Healthy, Sick): "),
        'FeedType': input("Enter Feed Type (TypeA, TypeB, TypeC): "),
        'FeedAmount': float(input("Enter Feed Amount in kg per day: ")),
        'WaterIntake': float(input("Enter Water Intake in liters per day: ")),
        'Temperature': float(input("Enter Temperature in degree Celsius: ")),
        'Humidity': float(input("Enter Humidity in percentage: ")),
        'BarnCondition': input("Enter Barn Condition (Clean, Moderate, Dirty): "),
        'PastMilkProduction': float(input("Enter Past Milk Production in liters per day: ")),
        'MilkingFrequency': int(input("Enter Milking Frequency per day: ")),
        'VetVisits': int(input("Enter Veterinary Visits per month: "))
    }

    # Predict milk production
    predicted_milk_production = predict_milk_production(model, input_data)
    print(f"Predicted Milk Production: {predicted_milk_production:.2f} liters per day")

    # Providing Nutrition Advice
    if input_data['HealthStatus'] == 'Sick':
        print("Advice: Your cow is sick. Consult a veterinarian for appropriate medical treatment.")
    if input_data['FeedAmount'] < 20:
        print("Advice: Increase the feed amount to at least 20 kg per day to ensure adequate nutrition.")
    if input_data['WaterIntake'] < 40:
        print("Advice: Increase the water intake to at least 40 liters per day to ensure proper hydration.")


Enter Animal ID: 1
Enter Breed (BreedA, BreedB, BreedC): BreedA
Enter Age in years: 4
Enter Weight in kg: 300
Enter Lactation Period in days: 200
Enter Health Status (Healthy, Sick): Sick
Enter Feed Type (TypeA, TypeB, TypeC): TypeA
Enter Feed Amount in kg per day: 15
Enter Water Intake in liters per day: 200
Enter Temperature in degree Celsius: 10
Enter Humidity in percentage: 30
Enter Barn Condition (Clean, Moderate, Dirty): Moderate
Enter Past Milk Production in liters per day: 10
Enter Milking Frequency per day: 2
Enter Veterinary Visits per month: 1
Predicted Milk Production: 137.70 liters per day
Advice: Your cow is sick. Consult a veterinarian for appropriate medical treatment.
Advice: Increase the feed amount to at least 20 kg per day to ensure adequate nutrition.


# Sample input data for prediction


```
input_data = {
    'AnimalID': 1,
    'Breed': 'BreedA',
    'Age': 5,
    'Weight': 500,
    'LactationPeriod': 200,
    'HealthStatus': 'Healthy',
    'FeedType': 'TypeB',
    'FeedAmount': 30,
    'WaterIntake': 50,
    'Temperature': 25,
    'Humidity': 60,
    'BarnCondition': 'Clean',
    'PastMilkProduction': 20,
    'MilkingFrequency': 2,
    'VetVisits': 1
}
```
