In [1]:
import pandas as pd
import numpy as np

# Define the number of samples
num_samples = 1000

# Generate features based on typical clinical signs for each stage
def generate_fever(stage):
    if stage == 'Infection':
        return np.round(np.random.uniform(37, 38), 1)
    elif stage == 'Acute':
        return np.round(np.random.uniform(39.5, 41), 1)
    elif stage == 'Piroplasm':
        return np.round(np.random.uniform(38, 39), 1)
    elif stage == 'Chronic':
        return np.round(np.random.uniform(38, 39.5), 1)

def generate_lymph_node_swelling(stage):
    if stage in ['Infection', 'Piroplasm']:
        return 'No'
    else:
        return 'Yes'

def generate_appetite_loss(stage):
    if stage == 'Infection':
        return 'No'
    else:
        return 'Yes'

def generate_lethargy(stage):
    if stage == 'Infection':
        return 'No'
    else:
        return 'Yes'

def generate_respiratory_signs(stage):
    if stage == 'Infection':
        return 'None'
    elif stage == 'Acute':
        return np.random.choice(['Moderate', 'Severe'])
    elif stage == 'Piroplasm':
        return 'None'
    elif stage == 'Chronic':
        return np.random.choice(['Mild', 'Moderate'])

def generate_anemia(stage):
    if stage in ['Infection', 'Acute']:
        return 'No'
    else:
        return 'Yes'

def generate_jaundice(stage):
    if stage in ['Infection', 'Piroplasm']:
        return 'No'
    else:
        return 'Yes'

def generate_weight_loss(stage):
    if stage == 'Infection':
        return 'No'
    else:
        return 'Yes'

def generate_reproductive_issues(stage):
    if stage == 'Infection':
        return 'No'
    elif stage == 'Acute':
        return np.random.choice(['Yes', 'No'])
    elif stage == 'Piroplasm':
        return 'No'
    elif stage == 'Chronic':
        return 'Yes'

# Define the stages
stages = ['Infection', 'Acute', 'Piroplasm', 'Chronic']

# Generate the dataset
data = []
for i in range(1, num_samples + 1):
    stage = np.random.choice(stages)
    data.append([
        i,
        generate_fever(stage),
        generate_lymph_node_swelling(stage),
        generate_appetite_loss(stage),
        generate_lethargy(stage),
        generate_respiratory_signs(stage),
        generate_anemia(stage),
        generate_jaundice(stage),
        generate_weight_loss(stage),
        generate_reproductive_issues(stage),
        stage
    ])

# Create the DataFrame
columns = ['Cow_ID', 'Fever', 'Lymph_Node_Swelling', 'Appetite_Loss', 'Lethargy', 'Respiratory_Signs', 'Anemia', 'Jaundice', 'Weight_Loss', 'Reproductive_Issues', 'Stage']
df = pd.DataFrame(data, columns=columns)

# Save the generated dataset to a CSV file
file_path = 'theileriosis_stages_dataset.csv'
df.to_csv(file_path, index=False)

file_path


'theileriosis_stages_dataset.csv'

In [3]:
import pandas as pd

# Load the dataset
file_path = 'theileriosis_stages_dataset.csv'
df = pd.read_csv(file_path)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode categorical features
label_encoders = {}
for column in df.columns:
    if df[column].dtype == object and column != 'Stage':
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

# Encode the target variable
le_stage = LabelEncoder()
df['Stage'] = le_stage.fit_transform(df['Stage'])

# Split the dataset into features and target variable
X = df.drop(columns=['Stage', 'Cow_ID'])
y = df['Stage']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model accuracy: {accuracy}')


Model accuracy: 1.0


In [6]:
import joblib


# model_path = 'theileriosis_stage_model.pkl'
# joblib.dump(model, model_path)
# Save the trained model
model_path = 'theileriosis_stage_model.joblib'
joblib.dump(model, model_path)

# Save the LabelEncoders
encoders_path = 'label_encoders.joblib'
joblib.dump(label_encoders, encoders_path)

# Save the Stage LabelEncoder
stage_encoder_path = 'stage_encoder.joblib'
joblib.dump(le_stage, stage_encoder_path)

model_path, encoders_path, stage_encoder_path


('theileriosis_stage_model.joblib',
 'label_encoders.joblib',
 'stage_encoder.joblib')