In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

1. Drop Name & Outcome time
2. Convert Age upon intake to single number (in weeks)
3. Split sex upon intake to intact/spayed/neutered and the actual gender (male/female)

- Could build a different model for cats and for dogs

In [2]:
def convert_to_weeks(value):
    value = value.lower()  # Make it case-insensitive
    if 'week' in value:
        # Extract number of weeks
        return int(value.split()[0])
    elif 'month' in value:
        # Convert months to weeks (1 month ≈ 4.345 weeks)
        return int(value.split()[0]) * 4.345
    elif 'year' in value:
        # Convert years to weeks (1 year ≈ 52.1775 weeks)
        return int(value.split()[0]) * 52.1775
    elif 'day' in value:
        # Convert days to 0 weeks
        return 0
    return 0  # In case of unexpected values

def extract_month_year(df, column='Intake Time'):
    """
    Convert the specified datetime column in the DataFrame to two new columns: Month and Year.

    Parameters:
        df (pd.DataFrame): Input DataFrame containing the datetime column.
        column (str): Name of the column to convert (default is 'Intake Time').

    Returns:
        pd.DataFrame: The original DataFrame with additional 'Month' and 'Year' columns.
    """
    # Convert the column to datetime objects. Adjust the format if needed.
    df[column] = pd.to_datetime(df[column], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')

    # Extract the month and year from the datetime column
    df['Intake Month'] = df[column].dt.month
    df['Intake Year'] = df[column].dt.year

    return df

In [3]:
train_data = pd.read_csv('train.csv', header=0)
train_data = train_data.drop(columns=['Id', 'Name', 'Outcome Time'])
train_data = train_data.drop(columns=['Found Location', 'Date of Birth'])
train_data = train_data.dropna()
print(train_data.columns)
train_data = pd.get_dummies(train_data, columns=['Intake Condition', 'Intake Type', 'Animal Type', 'Sex upon Intake'])
train_data['Age upon Intake'] = train_data['Age upon Intake'].apply(convert_to_weeks)
train_data = extract_month_year(train_data, column='Intake Time')
train_data = train_data.drop(columns=['Intake Time'])
print(train_data.columns)
# train_data.head()

Index(['Intake Time', 'Intake Type', 'Intake Condition', 'Animal Type',
       'Sex upon Intake', 'Age upon Intake', 'Breed', 'Color', 'Outcome Type'],
      dtype='object')
Index(['Age upon Intake', 'Breed', 'Color', 'Outcome Type',
       'Intake Condition_Aged', 'Intake Condition_Agonal',
       'Intake Condition_Behavior', 'Intake Condition_Congenital',
       'Intake Condition_Feral', 'Intake Condition_Injured',
       'Intake Condition_Med Attn', 'Intake Condition_Med Urgent',
       'Intake Condition_Medical', 'Intake Condition_Neonatal',
       'Intake Condition_Neurologic', 'Intake Condition_Normal',
       'Intake Condition_Nursing', 'Intake Condition_Other',
       'Intake Condition_Parvo', 'Intake Condition_Pregnant',
       'Intake Condition_Sick', 'Intake Condition_Space',
       'Intake Condition_Unknown', 'Intake Type_Abandoned',
       'Intake Type_Euthanasia Request', 'Intake Type_Owner Surrender',
       'Intake Type_Public Assist', 'Intake Type_Stray',
       'Intak

In [4]:
# for col in train_data.columns:
#     uniques = train_data[col].dropna().unique()
#     print(f"Column: {col}")
#     print(uniques[:20])  # limit to first 20 unique values
#     print(f"Total unique (non-null): {len(uniques)}")
#     print("-" * 40)

In [5]:
test_data = pd.read_csv('test.csv', header=0)
test_data.head()

Unnamed: 0,Id,Intake Time,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Date of Birth
0,1,1/3/19 16:19,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,1/3/17
1,2,10/21/13 7:59,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,9/21/13
2,3,6/29/14 10:38,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,6/29/10
3,4,7/11/15 18:19,Galilee Court And Damita Jo Dr in Manor (TX),Stray,Normal,Dog,Intact Female,5 months,Pit Bull,Brown/White,1/11/15
4,5,2/4/17 10:10,208 Beaver St in Austin (TX),Stray,Injured,Cat,Intact Female,2 years,Domestic Shorthair Mix,Black/White,2/4/15


# Neural Net (MLP)

In [6]:
train_data = train_data.drop(columns=['Breed', 'Color'])

train_x = train_data.drop('Outcome Type', axis=1)
train_y = train_data['Outcome Type']

In [7]:
outcome_mapping = {
	'Return to Owner': 0,
	'Transfer': 1,
	'Adoption': 2,
	'Died': 3,
	'Euthanasia': 4
}

train_y_encoded = train_y.map(outcome_mapping)

X_np = train_x.values.astype(np.float32)
y_np = train_y_encoded.values.astype(np.int64)

X_tensor = torch.tensor(X_np)
y_tensor = torch.tensor(y_np)

dataset = TensorDataset(X_tensor, y_tensor)
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)