Exp1

Disease

In [36]:
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split
import pandas as pd
import xgboost as xgb

input_data = pd.read_csv('../PhysicalActivity_total_hours.csv')
output_data = pd.read_csv('../disease_output.csv')

X = input_data.drop('dog_id', axis=1)

total_f1_macro = 0
total_f1_micro = 0
total_crossval_f1_macro = 0
total_crossval_f1_micro = 0
num_columns = 0

for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")




Average F1 Score (Macro): 30.32%
Average F1 Score (Micro): 88.52%
Average Crossval F1 Score (Macro): 29.87%
Average Crossval F1 Score (Micro): 88.41%


Exp 2

Individual Breed

In [18]:
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split

input_data = pd.read_csv('../one_hot_encoded_breed_with_id.csv')
output_data = pd.read_csv('../disease_output.csv')

X = input_data.drop('dog_id', axis=1)

total_f1 = 0
total_crossval_f1 = 0
num_columns = 0

# for each column in the output data, train a model and calculate the f1 score
for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")



Average F1 Score: 83.64%
Average Crossval F1 Score: 83.47%


Exp 3
Individual Breed and Age

In [26]:
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split

input_data = pd.read_csv('../one_hot_encoded_breed_with_id.csv')
input_data2 = pd.read_csv('../one_hot_encoded_age_with_id.csv')
output_data = pd.read_csv('../disease_output.csv')

#convert .csv txt to boolean
for col in input_data2.columns:
    if 'age' in col:
        input_data2[col] = input_data2[col].astype(bool)

merged_data = pd.merge(input_data, input_data2, on='dog_id')
X = merged_data.drop('dog_id', axis=1)

total_f1 = 0
total_crossval_f1 = 0
num_columns = 0

# for each column in the output data, train a model and calculate the f1 score
for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")

KeyboardInterrupt: 

Exp4

Disease and Breed Group

In [27]:
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split

input_data = pd.read_csv('../one_hot_encoded_breed_group_with_id.csv')
input_data2 = pd.read_csv('../one_hot_encoded_disease_input.csv')
output_data = pd.read_csv('../disease_output.csv')

#convert .csv txt to boolean
for col in input_data2.columns:
    if 'sex' in col:
        input_data2[col] = input_data2[col].astype(bool)

merged_data = pd.merge(input_data, input_data2, on='dog_id')
X = merged_data.drop('dog_id', axis=1)

total_f1 = 0
total_crossval_f1 = 0
num_columns = 0

# for each column in the output data, train a model and calculate the f1 score
for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")

KeyboardInterrupt: 

Exp 5

Disease / Breed Group / Individual Breed

In [28]:
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split

input_data = pd.read_csv('../one_hot_encoded_breed_group_with_id.csv')
input_data2 = pd.read_csv('../one_hot_encoded_breed_with_id.csv')
input_data3 = pd.read_csv('../one_hot_encoded_disease_input.csv')
output_data = pd.read_csv('../disease_output.csv')

#convert .csv txt to boolean
# for col in input_data2.columns:
#     if 'age' in col:
#         input_data2[col] = input_data2[col].astype(bool)

merged_data = pd.merge(input_data, input_data2, on='dog_id')
X = merged_data.drop('dog_id', axis=1)

total_f1 = 0
total_crossval_f1 = 0
num_columns = 0

# for each column in the output data, train a model and calculate the f1 score
for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")

KeyboardInterrupt: 

Ex6

Disease / Breed Group/ Individual Breed /Age /Sex

In [34]:
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split

input_data = pd.read_csv('../one_hot_encoded_sex_with_id.csv')
input_data2 = pd.read_csv('../one_hot_encoded_breed_with_id.csv')
input_data3 = pd.read_csv('../one_hot_encoded_disease_input.csv')
input_data4 = pd.read_csv('../one_hot_encoded_age_with_id.csv')
input_data5 = pd.read_csv('../one_hot_encoded_breed_group_with_id.csv')
output_data = pd.read_csv('../disease_output.csv')

#convert .csv txt to boolean
for col in input_data.columns:
    if 'sex' in col:
        input_data[col] = input_data[col].astype(bool)

merged_data = pd.merge(input_data, input_data2, input_data3,input_data4,input_data5,on='dog_id')
X = input_data.drop('dog_id', axis=1)

total_f1_macro = 0
total_f1_micro = 0
total_crossval_f1_macro = 0
total_crossval_f1_micro = 0
num_columns = 0

for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")



Average F1 Score (Macro): 30.32%
Average F1 Score (Micro): 88.52%
Average Crossval F1 Score (Macro): 29.87%
Average Crossval F1 Score (Micro): 88.41%


exp7

Disease / Breed Group/ Individual Breed /Age /Sex / PhysicalActivity

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score, train_test_split

input_data = pd.read_csv('../one_hot_encoded_sex_with_id.csv')
input_data2 = pd.read_csv('../one_hot_encoded_breed_with_id.csv')
input_data3 = pd.read_csv('../one_hot_encoded_disease_input.csv')
input_data4 = pd.read_csv('../one_hot_encoded_age_with_id.csv')
input_data5 = pd.read_csv('../one_hot_encoded_breed_group_with_id.csv')
output_data = pd.read_csv('../disease_output.csv')

#convert .csv txt to boolean
for col in input_data.columns:
    if 'sex' in col:
        input_data[col] = input_data[col].astype(bool)

merged_data = pd.merge(input_data, input_data2, input_data3,input_data4,input_data5,on='dog_id')
X = input_data.drop('dog_id', axis=1)

total_f1_macro = 0
total_f1_micro = 0
total_crossval_f1_macro = 0
total_crossval_f1_micro = 0
num_columns = 0

for column in output_data.columns:
    if column != 'dog_id':
        y = output_data[column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        model = xgb.XGBClassifier()
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        total_f1_macro += f1_score(y_test, y_pred, average='macro')
        total_f1_micro += f1_score(y_test, y_pred, average='micro')
        total_crossval_f1_macro += cross_val_score(model, X, y, cv=5, scoring='f1_macro').mean()
        total_crossval_f1_micro += cross_val_score(model, X, y, cv=5, scoring='f1_micro').mean()

        num_columns += 1

average_f1_macro = total_f1_macro / num_columns
average_f1_micro = total_f1_micro / num_columns
average_crossval_f1_macro = total_crossval_f1_macro / num_columns
average_crossval_f1_micro = total_crossval_f1_micro / num_columns

print(f"Average F1 Score (Macro): {average_f1_macro:.2%}")
print(f"Average F1 Score (Micro): {average_f1_micro:.2%}")
print(f"Average Crossval F1 Score (Macro): {average_crossval_f1_macro:.2%}")
print(f"Average Crossval F1 Score (Micro): {average_crossval_f1_micro:.2%}")