# **Import Libraries**

In [2]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import LSTM,Bidirectional,Dense,Input,Add,Activation,Concatenate
from tensorflow.keras import Model

from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from tensorflow.keras.utils import plot_model
import scipy

from google.colab.drive import mount
mount("/content/drive")

Mounted at /content/drive


# **Read Data**

In [3]:
train_df=pd.read_csv("/content/drive/MyDrive/Data Sets/Horse/train.csv")
test_df=pd.read_csv("/content/drive/MyDrive/Data Sets/Horse/test.csv")
train_df.columns

Index(['id', 'surgery', 'age', 'hospital_number', 'rectal_temp', 'pulse',
       'respiratory_rate', 'temp_of_extremities', 'peripheral_pulse',
       'mucous_membrane', 'capillary_refill_time', 'pain', 'peristalsis',
       'abdominal_distention', 'nasogastric_tube', 'nasogastric_reflux',
       'nasogastric_reflux_ph', 'rectal_exam_feces', 'abdomen',
       'packed_cell_volume', 'total_protein', 'abdomo_appearance',
       'abdomo_protein', 'surgical_lesion', 'lesion_1', 'lesion_2', 'lesion_3',
       'cp_data', 'outcome'],
      dtype='object')

In [4]:
train_df.describe()

Unnamed: 0,id,hospital_number,rectal_temp,pulse,respiratory_rate,nasogastric_reflux_ph,packed_cell_volume,total_protein,abdomo_protein,lesion_1,lesion_2,lesion_3
count,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0,1235.0
mean,617.0,954500.4,38.202186,79.574089,30.054251,4.382591,49.602429,21.388016,3.290931,3832.496356,14.612146,3.577328
std,356.6581,1356403.0,0.788668,29.108638,16.452066,1.937357,10.5358,26.676453,1.589195,5436.733774,193.705735,88.858953
min,0.0,521399.0,35.4,30.0,8.0,1.0,23.0,3.5,0.1,0.0,0.0,0.0
25%,308.5,528800.0,37.8,53.0,18.0,2.0,43.0,6.6,2.0,2205.0,0.0,0.0
50%,617.0,529777.0,38.2,76.0,28.0,4.5,48.0,7.5,3.0,2209.0,0.0,0.0
75%,925.5,534145.0,38.6,100.0,36.0,6.0,57.0,9.1,4.3,3205.0,0.0,0.0
max,1234.0,5305129.0,40.8,184.0,96.0,7.5,75.0,89.0,10.1,41110.0,3112.0,2209.0


In [5]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1235 entries, 0 to 1234
Data columns (total 29 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     1235 non-null   int64  
 1   surgery                1235 non-null   object 
 2   age                    1235 non-null   object 
 3   hospital_number        1235 non-null   int64  
 4   rectal_temp            1235 non-null   float64
 5   pulse                  1235 non-null   float64
 6   respiratory_rate       1235 non-null   float64
 7   temp_of_extremities    1235 non-null   object 
 8   peripheral_pulse       1235 non-null   object 
 9   mucous_membrane        1235 non-null   object 
 10  capillary_refill_time  1235 non-null   object 
 11  pain                   1235 non-null   object 
 12  peristalsis            1235 non-null   object 
 13  abdominal_distention   1235 non-null   object 
 14  nasogastric_tube       1235 non-null   object 
 15  naso

# **Visualize Data**

In [None]:
ProfileReport(train_df)

# **Preprocessing Data**

In [6]:
train_df.columns

Index(['id', 'surgery', 'age', 'hospital_number', 'rectal_temp', 'pulse',
       'respiratory_rate', 'temp_of_extremities', 'peripheral_pulse',
       'mucous_membrane', 'capillary_refill_time', 'pain', 'peristalsis',
       'abdominal_distention', 'nasogastric_tube', 'nasogastric_reflux',
       'nasogastric_reflux_ph', 'rectal_exam_feces', 'abdomen',
       'packed_cell_volume', 'total_protein', 'abdomo_appearance',
       'abdomo_protein', 'surgical_lesion', 'lesion_1', 'lesion_2', 'lesion_3',
       'cp_data', 'outcome'],
      dtype='object')

In [7]:
train_df.drop(["id","hospital_number"],axis=1,inplace=True)

In [12]:
numericalVariables=[ 'rectal_temp', 'pulse','respiratory_rate',"pulse",'respiratory_rate', "nasogastric_reflux_ph",'packed_cell_volume', 'total_protein','abdomo_protein',"lesion_1"]

categoricalVariables=["abdomo_appearance","pain","capillary_refill_time","mucous_membrane","abdomen","rectal_exam_feces","nasogastric_reflux","nasogastric_tube","abdominal_distention","peristalsis","pain",'temp_of_extremities', 'peripheral_pulse',]

y_vector=pd.get_dummies(train_df["outcome"])

In [14]:
for i in categoricalVariables:
    print("--x--","\n")
    print(i,pd.unique(train_df[i]))
    print("--x--","\n")


--x-- 

abdomo_appearance ['serosanguious' 'cloudy' 'clear' 'None']
--x-- 

--x-- 

pain ['depressed' 'mild_pain' 'extreme_pain' 'alert' 'severe_pain' 'None'
 'slight']
--x-- 

--x-- 

capillary_refill_time ['more_3_sec' 'less_3_sec' 'None' '3']
--x-- 

--x-- 

mucous_membrane ['dark_cyanotic' 'pale_cyanotic' 'pale_pink' 'normal_pink' 'bright_pink'
 'bright_red' 'None']
--x-- 

--x-- 

abdomen ['distend_small' 'distend_large' 'normal' 'firm' 'None' 'other']
--x-- 

--x-- 

rectal_exam_feces ['decreased' 'absent' 'None' 'normal' 'increased' 'serosanguious']
--x-- 

--x-- 

nasogastric_reflux ['less_1_liter' 'more_1_liter' 'none' 'None' 'slight']
--x-- 

--x-- 

nasogastric_tube ['slight' 'none' 'significant' 'None']
--x-- 

--x-- 

abdominal_distention ['slight' 'moderate' 'none' 'severe' 'None']
--x-- 

--x-- 

peristalsis ['absent' 'hypomotile' 'normal' 'hypermotile' 'None' 'distend_small']
--x-- 

--x-- 

pain ['depressed' 'mild_pain' 'extreme_pain' 'alert' 'severe_pain' 'None'
 'sli

In [None]:
train_df["cp_data"].replace({
    "yes":1,
    "no":0}
,inplace=True)

train_df["surgical_lesion"].replace({
    "yes":1,
    "no":0}
,inplace=True)

train_df["surgery"].replace({
    "yes":1,
    "no":0}
,inplace=True)

train_df["age"].replace({
    "adult":1,
    "young":0}
,inplace=True)

# **Visualize Data - 2**

In [None]:
plt.suptitle("Box Plot for All Numerical Variables")
for i in numericalVariables:
    fig = px.box(train_df, y=i)
    fig.show()

# **Data Preprocessing - 2**

In [None]:
numericalPipeline=Pipeline([

    ("Imputer",SimpleImputer(strategy="median")),
    ("StandardScaler",StandardScaler())

])

categoricalPipeline=Pipeline([

    ("Imputer",SimpleImputer(strategy="most_frequent")),
    ("OneHotEncoder",OneHotEncoder())

])

In [None]:
Transformer=ColumnTransformer([

    ("NumericalVariables",numericalPipeline,numericalVariables),
    ("CategoricalVariables",categoricalPipeline,categoricalVariables)

])

In [None]:
x_train_vector=Transformer.fit_transform(train_df)

In [None]:
x_train_vector=scipy.sparse.lil_matrix(x_train_vector).toarray()


# **Model Building**

In [None]:
inputlayer=Input((81,),name="inputlayer")

#Parallel Branch One
dense1=Dense(2056,activation="relu")(inputlayer)
dense2=Dense(1024,activation="relu")(dense1)
dense3=Dense(512,activation="relu")(dense2)
dense4=Dense(256,activation="relu")(dense3)
dense5=Dense(256,activation="relu")(dense4)

#Parallel Branch One
dense6=Dense(2056,activation="tanh")(inputlayer)
dense7=Dense(1024,activation="relu")(dense6)
dense8=Dense(512,activation="relu")(dense7)
dense9=Dense(256,activation="relu")(dense8)
dense10=Dense(256,activation="relu")(dense9)

#Concatenate Different Outputs
add=Activation("relu")(Add()([dense5,dense10]))



dense11=Dense(1024,activation="relu")(add)
dense12=Dense(1024,activation="tanh")(dense11)
dense13=Dense(512,activation="relu")(dense12)
dense14=Dense(512,activation="relu")(dense13)
output=Dense(3,activation="softmax")(dense14)

In [None]:
ml=Model(inputs=inputlayer,outputs=output)
plot_model(ml,show_layer_names=True,show_layer_activations=True,show_shapes=True)

In [None]:
ml.compile(

    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics="accuracy"
)

In [None]:
ml.fit(x=x_train_vector,y=y_vector,epochs=100,verbose=2)