In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import pickle
import joblib

In [2]:
data = pd.read_csv('synthetic_cow_health_data.csv')

In [3]:
data.head(10)

Unnamed: 0,temperature,blood_pressure,blood_oxygen,disease
0,39.435426,97.474845,87.534877,Respiratory Disease
1,37.061308,69.631068,89.559155,Circulatory Shock
2,36.139867,63.21911,90.87519,Circulatory Shock
3,37.525368,72.426094,93.132571,Milk Fever
4,36.609079,78.226925,94.713761,Milk Fever
5,36.611398,66.762315,85.759108,Circulatory Shock
6,36.976525,80.746696,91.732977,Milk Fever
7,39.713483,88.43388,92.889448,Respiratory Disease
8,41.305456,97.500174,94.453206,Heat Stress
9,39.634303,97.71187,88.314365,Respiratory Disease


In [4]:
data.sum()

temperature                                           233434.073233
blood_pressure                                        544898.668258
blood_oxygen                                          554108.897171
disease           Respiratory DiseaseCirculatory ShockCirculator...
dtype: object

In [5]:
data.count()

temperature       6000
blood_pressure    6000
blood_oxygen      6000
disease           6000
dtype: int64

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6000 entries, 0 to 5999
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   temperature     6000 non-null   float64
 1   blood_pressure  6000 non-null   float64
 2   blood_oxygen    6000 non-null   float64
 3   disease         6000 non-null   object 
dtypes: float64(3), object(1)
memory usage: 187.6+ KB


In [7]:
X = data.drop('disease', axis=1)
y = data['disease']

In [8]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y) 

In [9]:
y

array([5, 0, 0, ..., 2, 3, 5])

# Identifying Encoded Values

In [10]:
original_labels = label_encoder.inverse_transform(y)

result_df = pd.DataFrame({
    'Encoded': y,
    'Original': data.disease
})

print("\nEncoded and Original Labels:")
print(result_df)


Encoded and Original Labels:
      Encoded             Original
0           5  Respiratory Disease
1           0    Circulatory Shock
2           0    Circulatory Shock
3           4           Milk Fever
4           4           Milk Fever
...       ...                  ...
5995        3             Mastitis
5996        2          Heat Stress
5997        2          Heat Stress
5998        3             Mastitis
5999        5  Respiratory Disease

[6000 rows x 2 columns]


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Training

In [12]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

In [13]:
for name, model in models.items():
    print(f"\n{'='*30}")
    print(f"Model: {name}")
    print(f"{'='*30}")
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\n")


Model: Logistic Regression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy: 0.80

Classification Report:
                     precision    recall  f1-score   support

  Circulatory Shock       0.87      0.84      0.85       198
            Healthy       0.98      0.98      0.98       207
        Heat Stress       0.72      0.74      0.73       220
           Mastitis       0.70      0.79      0.75       185
         Milk Fever       0.80      0.84      0.82       189
Respiratory Disease       0.69      0.59      0.63       201

           accuracy                           0.80      1200
          macro avg       0.79      0.80      0.79      1200
       weighted avg       0.79      0.80      0.79      1200


Confusion Matrix:
[[166   0   0   0  30   2]
 [  0 203   0   4   0   0]
 [  4   0 162  15   3  36]
 [  0   2  16 147   5  15]
 [ 21   2   0   8 158   0]
 [  0   0  47  35   1 118]]



Model: Decision Tree
Accuracy: 0.94

Classification Report:
                     precision    recall  f1-score   support

  Circulatory Shock       0.99      0.96 

In [14]:
xgb_model = models["XGBoost"]
model_filename = 'cows_diseases_detection_model_XGB.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(xgb_model, file)  # Save the trained XGBoost model
print(f"Model saved as {model_filename}")

Model saved as cows_diseases_detection_model_XGB.pkl


In [15]:
data.head(100)

Unnamed: 0,temperature,blood_pressure,blood_oxygen,disease
0,39.435426,97.474845,87.534877,Respiratory Disease
1,37.061308,69.631068,89.559155,Circulatory Shock
2,36.139867,63.219110,90.875190,Circulatory Shock
3,37.525368,72.426094,93.132571,Milk Fever
4,36.609079,78.226925,94.713761,Milk Fever
...,...,...,...,...
95,36.922077,84.037249,91.288556,Milk Fever
96,38.348069,73.357854,93.000888,Milk Fever
97,39.997212,92.366208,92.962141,Respiratory Disease
98,36.178867,61.110945,81.345364,Circulatory Shock


# Predictions with User inputs

In [18]:
label_mapping = {
    0: 'Circulatory Shock',
    1: 'Healthy',
    2: 'Heat Stress',
    3: 'Mastitis',
    4: 'Milk Fever',
    5: 'Respiratory Disease'
}


loaded_model = joblib.load('cows_diseases_detection_model_XGB.pkl')
def predict_disease(temperature, blood_pressure, blood_oxygen):
    user_input = pd.DataFrame([[temperature, blood_pressure, blood_oxygen]], columns=['temperature', 'blood_pressure', 'blood_oxygen'])
    prediction = loaded_model.predict(user_input)
    predicted_disease = label_mapping[prediction[0]]
    return predicted_disease

# Example user input
temperature = float(input("Enter the cow's temperature: "))
blood_pressure = float(input("Enter the cow's blood pressure: "))
blood_oxygen = float(input("Enter the cow's blood oxygen level: "))

predicted_disease = predict_disease(temperature, blood_pressure, blood_oxygen)
print(f"The predicted disease is: {predicted_disease}")

Enter the cow's temperature: 37.061308
Enter the cow's blood pressure: 69.631068	
Enter the cow's blood oxygen level: 89.559155
The predicted disease is: Circulatory Shock


In [17]:
data.head(10)

Unnamed: 0,temperature,blood_pressure,blood_oxygen,disease
0,39.435426,97.474845,87.534877,Respiratory Disease
1,37.061308,69.631068,89.559155,Circulatory Shock
2,36.139867,63.21911,90.87519,Circulatory Shock
3,37.525368,72.426094,93.132571,Milk Fever
4,36.609079,78.226925,94.713761,Milk Fever
5,36.611398,66.762315,85.759108,Circulatory Shock
6,36.976525,80.746696,91.732977,Milk Fever
7,39.713483,88.43388,92.889448,Respiratory Disease
8,41.305456,97.500174,94.453206,Heat Stress
9,39.634303,97.71187,88.314365,Respiratory Disease
