In [1]:
# Import Python Libraries
import pandas as pd
import numpy as np

In [10]:
# Specify what and where is the data file
filename = 'C:/Users/DKell/Downloads/sheltered_animals.csv'

col_names = [
    'AnimalID',
    'DateTime',
    'MonthYear',
    'DateOfBirth',
    'AnimalType',
    'SexUponOutcome',
    'AgeUponOutcome',
    'Breed',
    'Color',
    'OutcomeType'
]

# Load the data into a Pandas DataFrame
df = pd.read_csv(filename)

In [11]:
# First, check if the column exists with the correct spelling
# If you're not sure about column names, you can print them:
print(df.columns.tolist())

['A771830', '12/27/2018 13:25', '12/27/2018 13:25.1', '5/10/2017', 'Dog', 'Neutered Male', '1 year', 'Pit Bull Mix', 'White/Blue', 'Adoption']


In [13]:
# First, let's check the actual column names in the DataFrame
print("Available columns:", df.columns.tolist())

# Then fix the code by using the correct column name
# Assuming the column is actually named "Age upon Outcome" (with capital O)
# or has some other variation in spacing or capitalization

df.columns = df.columns.str.strip()  # removes hidden spaces

def convert_age_to_months(age):
    if isinstance(age, str) and len(age.split()) == 2:
        num, unit = age.split()
        num = float(num)
        if 'year' in unit.lower():
            return num * 12
        elif 'month' in unit.lower():
            return num
        elif 'week' in unit.lower():
            return num / 4
        elif 'day' in unit.lower():
            return num / 30
    return None

# Find the correct column name that contains age information
# This is a more robust approach that checks for similar column names
age_columns = [col for col in df.columns if 'age' in col.lower()]
print("Possible age columns:", age_columns)

# Use the correct column name - replace 'Age upon Outcome' with the actual column name from your DataFrame
# For example, if the correct column is the first one found:
if age_columns:
    correct_age_column = age_columns[0]
    df['AgeInMonths'] = df[correct_age_column].apply(convert_age_to_months)
    df['AgeInMonths'].fillna(df['AgeInMonths'].median(), inplace=True)
else:
    print("No age-related columns found. Please check your DataFrame.")

Available columns: ['A771830', '12/27/2018 13:25', '12/27/2018 13:25.1', '5/10/2017', 'Dog', 'Neutered Male', '1 year', 'Pit Bull Mix', 'White/Blue', 'Adoption']
Possible age columns: []
No age-related columns found. Please check your DataFrame.


In [18]:
df = pd.read_csv(filename, names=col_names, index_col= False)

In [6]:
# Import libraries
import pandas as pd

# Load your data into a DataFrame
# Replace 'your_file.csv' with the actual path to your data file
df = pd.read_csv('C:/Users/DKell/Downloads/sheltered_animals.csv')  # or pd.read_excel(), pd.read_json(), etc.

# Now proceed with your original code
df.columns = df.columns.str.strip()  # remove hidden spaces

# Print column names to check what's available
print("Available columns:", df.columns.tolist())

# Find the correct age column name (it might be slightly different)
# Look for columns that might contain 'age' in their name
age_columns = [col for col in df.columns if 'age' in col.lower()]
print("Possible age columns:", age_columns)

# Assuming you found the correct column name, replace 'Age upon Outcome' with it
# For example, if the correct column is 'age_upon_outcome' or 'Age_upon_Outcome'
# Use the correct column name below (this is just an example)
age_column = age_columns[0] if age_columns else None  # Use the first match if found

def convert_age_to_months(age):
    if isinstance(age, str) and len(age.split()) >= 2:
        num, unit = age.split()[:2]
        num = float(num)
        unit = unit.lower()

        if 'year' in unit:
            return num * 12
        elif 'month' in unit:
            return num
        elif 'week' in unit:
            return num / 4
        elif 'day' in unit:
            return num / 30
    return None

# Only proceed if we found an age column
if age_column:
    df['AgeInMonths'] = df[age_column].apply(convert_age_to_months)
    df['AgeInMonths'].fillna(df['AgeInMonths'].median(), inplace=True)
else:
    print("Could not find an appropriate age column. Please check the column names manually.")

Available columns: ['A771830', '12/27/2018 13:25', '12/27/2018 13:25.1', '5/10/2017', 'Dog', 'Neutered Male', '1 year', 'Pit Bull Mix', 'White/Blue', 'Adoption']
Possible age columns: []
Could not find an appropriate age column. Please check the column names manually.


In [10]:
# First check the actual column names in the DataFrame
print(df.columns.tolist())

# After seeing the actual column names, use the correct names in drop
# For example, if the actual column names are different than what you expected:
# df.drop(['ActualColumnName1', 'ActualColumnName2'], axis=1, inplace=True)

# A safer approach that won't raise errors if columns don't exist:
columns_to_drop = ['AnimalID', 'MonthYear']  # Your intended columns to drop
existing_columns = [col for col in columns_to_drop if col in df.columns]
if existing_columns:
    df.drop(existing_columns, axis=1, inplace=True)
    print(f"Dropped columns: {existing_columns}")
else:
    print("None of the specified columns were found in the DataFrame")

['A771830', '12/27/2018 13:25', '12/27/2018 13:25.1', '5/10/2017', 'Dog', 'Neutered Male', '1 year', 'Pit Bull Mix', 'White/Blue', 'Adoption']
None of the specified columns were found in the DataFrame


In [11]:
df.to_csv("animal_sheltered_cleaned.csv", index=False)