In [1]:
import pandas as pd
import random

# Define the number of rows (data points) in the DataFrame
num_rows = 1000

# Define ranges and categories for each parameter
temperature_range = (39.0, 42.0)
behavioral_score_categories = ['Normal', 'Slightly Abnormal', 'Abnormal']
respiratory_rate_range = (10, 40)
weight_range = (100, 2000)
feed_consumption_range = (0, 150)
age_weeks_range = (4, 52)

# Create a dictionary with synthetic survey data
data = {
    'Body Temperature (°C)': [random.uniform(*temperature_range) for _ in range(num_rows)],
    'Behavioral Score': [random.choice(behavioral_score_categories) for _ in range(num_rows)],
    'Respiratory Rate (breaths/minute)': [random.randint(*respiratory_rate_range) for _ in range(num_rows)],
    'Weight (grams)': [random.uniform(*weight_range) for _ in range(num_rows)],
    'Feed Consumption (grams/day)': [random.uniform(*feed_consumption_range) for _ in range(num_rows)],
    'Age (weeks)': [random.randint(*age_weeks_range) for _ in range(num_rows)],
    'Diagnosed Diseases': [random.choice(['Respiratory Infection', 'Coccidiosis', "Marek's Disease"]) for _ in range(num_rows)]
}

# Create the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.head()


Unnamed: 0,Body Temperature (°C),Behavioral Score,Respiratory Rate (breaths/minute),Weight (grams),Feed Consumption (grams/day),Age (weeks),Diagnosed Diseases
0,39.762445,Slightly Abnormal,40,1138.957093,69.617705,41,Marek's Disease
1,41.826932,Slightly Abnormal,15,997.157859,32.2247,48,Coccidiosis
2,39.695208,Normal,30,1372.005015,94.471927,27,Coccidiosis
3,41.399159,Abnormal,38,1660.388217,108.495961,42,Coccidiosis
4,39.156094,Abnormal,18,1070.502088,20.484453,5,Respiratory Infection


In [2]:
from sklearn.preprocessing import OrdinalEncoder
# Ordinal encoding for 'Behavioral Score'
ordinal_encoder = OrdinalEncoder(categories=[behavioral_score_categories])
df['Behavioral Score'] = ordinal_encoder.fit_transform(df[['Behavioral Score']])

# Display the DataFrame with ordinal encoding applied
df.head()

Unnamed: 0,Body Temperature (°C),Behavioral Score,Respiratory Rate (breaths/minute),Weight (grams),Feed Consumption (grams/day),Age (weeks),Diagnosed Diseases
0,39.762445,1.0,40,1138.957093,69.617705,41,Marek's Disease
1,41.826932,1.0,15,997.157859,32.2247,48,Coccidiosis
2,39.695208,0.0,30,1372.005015,94.471927,27,Coccidiosis
3,41.399159,2.0,38,1660.388217,108.495961,42,Coccidiosis
4,39.156094,2.0,18,1070.502088,20.484453,5,Respiratory Infection


In [3]:
from sklearn.preprocessing import LabelEncoder
# Label encoding for 'Diagnosed Diseases'
label_encoder = LabelEncoder()
df['Diagnosed Diseases'] = label_encoder.fit_transform(df['Diagnosed Diseases'])

# Display the DataFrame with label encoding applied
df.head()

Unnamed: 0,Body Temperature (°C),Behavioral Score,Respiratory Rate (breaths/minute),Weight (grams),Feed Consumption (grams/day),Age (weeks),Diagnosed Diseases
0,39.762445,1.0,40,1138.957093,69.617705,41,1
1,41.826932,1.0,15,997.157859,32.2247,48,0
2,39.695208,0.0,30,1372.005015,94.471927,27,0
3,41.399159,2.0,38,1660.388217,108.495961,42,0
4,39.156094,2.0,18,1070.502088,20.484453,5,2


In [6]:
df.duplicated().sum()

0

In [7]:
df.describe()

Unnamed: 0,Body Temperature (°C),Behavioral Score,Respiratory Rate (breaths/minute),Weight (grams),Feed Consumption (grams/day),Age (weeks),Diagnosed Diseases
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,40.479604,1.014,24.879,1051.774127,74.624899,28.064,1.005
std,0.878849,0.790208,8.829968,554.067206,42.777775,14.370052,0.841239
min,39.005929,0.0,10.0,101.358484,0.098384,4.0,0.0
25%,39.706156,0.0,17.0,577.181603,36.965655,16.0,0.0
50%,40.440514,1.0,25.0,1039.174116,74.400501,28.0,1.0
75%,41.270231,2.0,32.0,1537.551716,111.554521,41.0,2.0
max,41.999052,2.0,40.0,1992.949311,149.512678,52.0,2.0


In [8]:
corr=df.corr()
corr

Unnamed: 0,Body Temperature (°C),Behavioral Score,Respiratory Rate (breaths/minute),Weight (grams),Feed Consumption (grams/day),Age (weeks),Diagnosed Diseases
Body Temperature (°C),1.0,0.042308,0.033217,-0.026456,0.068155,-0.063516,0.022599
Behavioral Score,0.042308,1.0,-0.028593,0.004878,-0.032002,0.00045,0.028505
Respiratory Rate (breaths/minute),0.033217,-0.028593,1.0,0.031539,0.010321,0.00765,-0.011642
Weight (grams),-0.026456,0.004878,0.031539,1.0,-0.003494,-0.004122,0.003894
Feed Consumption (grams/day),0.068155,-0.032002,0.010321,-0.003494,1.0,-0.035859,-0.010553
Age (weeks),-0.063516,0.00045,0.00765,-0.004122,-0.035859,1.0,-0.002511
Diagnosed Diseases,0.022599,0.028505,-0.011642,0.003894,-0.010553,-0.002511,1.0


In [15]:
X = df.drop(['Diagnosed Diseases'],axis=1)
y = df['Diagnosed Diseases']

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
from sklearn.preprocessing import MinMaxScaler
ms = MinMaxScaler()

X_train = ms.fit_transform(X_train)
X_test = ms.transform(X_test)

In [18]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()


sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [20]:
# create instances of all models
models = {
    'Logistic Regression': LogisticRegression(),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
}


for name, md in models.items():
    md.fit(X_train,y_train)
    ypred = md.predict(X_test)
    
    print(f"{name}  with accuracy : {accuracy_score(y_test,ypred)}")

Logistic Regression  with accuracy : 0.385
Naive Bayes  with accuracy : 0.36
Support Vector Machine  with accuracy : 0.325
K-Nearest Neighbors  with accuracy : 0.315
Decision Tree  with accuracy : 0.345
Random Forest  with accuracy : 0.35
Bagging  with accuracy : 0.25
AdaBoost  with accuracy : 0.305
Gradient Boosting  with accuracy : 0.33
Extra Trees  with accuracy : 0.32


In [21]:
lr = LogisticRegression()
lr.fit(X_train,y_train)
ypred = lr.predict(X_test)
accuracy_score(y_test,ypred)

0.385

In [30]:
import numpy as np
def recommendation(temperature, behavioral_score, respiratory_rate, weight, feed_consumption, age_weeks):
    features = np.array([[temperature, behavioral_score, respiratory_rate, weight, feed_consumption, age_weeks]])
    transformed_features = ms.fit_transform(features)
    transformed_features = sc.fit_transform(transformed_features)
    prediction = lr.predict(transformed_features).reshape(1,-1)
    
    return prediction[0]

In [34]:
temperature=39.76
behavioral_score=1
respiratory_rate=40
weight=1138
feed_consumption=69.6
age_weeks=41
predict = recommendation(temperature, behavioral_score, respiratory_rate, weight, feed_consumption, age_weeks)
disease_dict = {0: "Coccidiosis", 1: "Marek's Disease", 2: "Respiratory Infection"}

if predict[0] in disease_dict:
    disease = disease_dict[predict[0]]
    print("{}".format(disease))
else:
    print("we are not able to recommend a proper disease")


Respiratory Infection 


In [35]:
import pickle
pickle.dump(lr,open('model.pkl','wb'))
pickle.dump(ms,open('minmaxscaler.pkl','wb'))
pickle.dump(sc,open('standscaler.pkl','wb'))