<a href="https://colab.research.google.com/github/NKColorado/colorado-animal-shelter-data/blob/main/Reclassify_Other_Outcomes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Cell 1: Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive')

print("‚úÖ Google Drive connected!")
print("üìÅ Your files are in: /content/drive/MyDrive/")

Mounted at /content/drive
‚úÖ Google Drive connected!
üìÅ Your files are in: /content/drive/MyDrive/


In [10]:
# Cell 2: Upload Your CSV Files
from google.colab import files
import os

print("üì§ UPLOAD YOUR 10 CSV FILES")
print("=" * 60)
print("Click 'Choose Files' below and select all 10 CSV files")
print("that you just downloaded from Google Drive")
print("=" * 60)

uploaded = files.upload()

print("")
print(f"‚úÖ Uploaded {len(uploaded)} files!")
for filename in uploaded.keys():
    print(f"   ‚úì {filename}")

üì§ UPLOAD YOUR 10 CSV FILES
Click 'Choose Files' below and select all 10 CSV files
that you just downloaded from Google Drive


Saving 2015_Colorado_Shelter_Data_Extracted (1).xlsx to 2015_Colorado_Shelter_Data_Extracted (1).xlsx
Saving 2016_Colorado_Shelter_Data_Extracted.csv to 2016_Colorado_Shelter_Data_Extracted.csv
Saving 2017_Colorado_Shelter_Data_Extracted.csv to 2017_Colorado_Shelter_Data_Extracted.csv
Saving 2018_Colorado_Shelter_Data_Extracted.xlsx to 2018_Colorado_Shelter_Data_Extracted.xlsx
Saving 2019_Colorado_Shelter_Data_Extracted.xlsx to 2019_Colorado_Shelter_Data_Extracted.xlsx
Saving 2020_Colorado_Shelter_Data_Extracted.xlsx to 2020_Colorado_Shelter_Data_Extracted.xlsx
Saving 2021_Colorado_Shelter_Data_Extracted.xlsx to 2021_Colorado_Shelter_Data_Extracted.xlsx
Saving 2022_Colorado_Shelter_Data_Extracted.xlsx to 2022_Colorado_Shelter_Data_Extracted.xlsx
Saving 2023_Colorado_Shelter_Data_Extracted.xlsx to 2023_Colorado_Shelter_Data_Extracted.xlsx
Saving 2024_Colorado_Shelter_Data_Extracted.csv to 2024_Colorado_Shelter_Data_Extracted.csv
Saving desktop.ini to desktop.ini

‚úÖ Uploaded 11 files!


In [11]:
# Cell 3: Reclassify "Other" Outcomes (Era 2: 2020-2024)
import pandas as pd
import os

print("üîß RECLASSIFYING 'OTHER' OUTCOMES")
print("=" * 80)
print("BUSINESS LOGIC:")
print("  Era 1 (2015-2019): 'Other Live Outcomes' ‚Üí POSITIVE (keep as is)")
print("  Era 2 (2020-2024): 'Other' (unlabeled) ‚Üí NEGATIVE (reclassify)")
print("=" * 80)
print("")

# Years to reclassify
ERA2_YEARS = [2020, 2021, 2022, 2023, 2024]

# Process each file
results = []
for year in range(2015, 2025):
    # Try both .csv and .xlsx extensions
    filename_csv = f"{year}_Colorado_Shelter_Data_Extracted.csv"
    filename_xlsx = f"{year}_Colorado_Shelter_Data_Extracted.xlsx"

    # Check which file exists
    if os.path.exists(filename_csv):
        filename = filename_csv
        df = pd.read_csv(filename)
    elif os.path.exists(filename_xlsx):
        filename = filename_xlsx
        df = pd.read_excel(filename)
    else:
        print(f"‚ö†Ô∏è  {year}: File not found")
        continue

    # Count before
    before_other_live = len(df[df['metric_type'] == 'outcome_other_live'])
    before_other = len(df[df['metric_type'] == 'outcome_other'])

    # Apply reclassification for Era 2 only
    if year in ERA2_YEARS:
        mask = df['metric_type'] == 'outcome_other_live'
        df.loc[mask, 'metric_type'] = 'outcome_other'
        records_changed = mask.sum()

        # Count after
        after_other_live = len(df[df['metric_type'] == 'outcome_other_live'])
        after_other = len(df[df['metric_type'] == 'outcome_other'])

        print(f"‚úÖ {year} (Era 2 - RECLASSIFIED):")
        print(f"   Before: outcome_other_live={before_other_live}, outcome_other={before_other}")
        print(f"   After:  outcome_other_live={after_other_live}, outcome_other={after_other}")
        print(f"   Records changed: {records_changed}")

        results.append({
            'year': year,
            'era': 'Era 2',
            'changed': records_changed
        })
    else:
        print(f"‚úì  {year} (Era 1 - NO CHANGE):")
        print(f"   outcome_other_live={before_other_live} (POSITIVE - kept)")

        results.append({
            'year': year,
            'era': 'Era 1',
            'changed': 0
        })

    # Save corrected file
    output_filename = f"{year}_Colorado_Shelter_Data_CORRECTED.csv"
    df.to_csv(output_filename, index=False)
    print(f"   üíæ Saved: {output_filename}")
    print("")

# Summary
print("=" * 80)
print("SUMMARY")
print("=" * 80)
total_changed = sum(r['changed'] for r in results)
print(f"‚úÖ Total records reclassified: {total_changed}")
print("")
print("üì• DOWNLOAD YOUR CORRECTED FILES BELOW!")
print("=" * 80)

üîß RECLASSIFYING 'OTHER' OUTCOMES
BUSINESS LOGIC:
  Era 1 (2015-2019): 'Other Live Outcomes' ‚Üí POSITIVE (keep as is)
  Era 2 (2020-2024): 'Other' (unlabeled) ‚Üí NEGATIVE (reclassify)

‚ö†Ô∏è  2015: File not found
‚úì  2016 (Era 1 - NO CHANGE):
   outcome_other_live=9 (POSITIVE - kept)
   üíæ Saved: 2016_Colorado_Shelter_Data_CORRECTED.csv

‚úì  2017 (Era 1 - NO CHANGE):
   outcome_other_live=9 (POSITIVE - kept)
   üíæ Saved: 2017_Colorado_Shelter_Data_CORRECTED.csv

‚úì  2018 (Era 1 - NO CHANGE):
   outcome_other_live=0 (POSITIVE - kept)
   üíæ Saved: 2018_Colorado_Shelter_Data_CORRECTED.csv

‚úì  2019 (Era 1 - NO CHANGE):
   outcome_other_live=0 (POSITIVE - kept)
   üíæ Saved: 2019_Colorado_Shelter_Data_CORRECTED.csv

‚úÖ 2020 (Era 2 - RECLASSIFIED):
   Before: outcome_other_live=9, outcome_other=0
   After:  outcome_other_live=0, outcome_other=9
   Records changed: 9
   üíæ Saved: 2020_Colorado_Shelter_Data_CORRECTED.csv

‚úÖ 2021 (Era 2 - RECLASSIFIED):
   Before: outcome_

In [12]:
# Cell 4: Download All Corrected Files
from google.colab import files
import os

print("üì• DOWNLOADING CORRECTED FILES")
print("=" * 60)

# Get all corrected files
corrected_files = [f for f in os.listdir() if 'CORRECTED.csv' in f]

print(f"Found {len(corrected_files)} corrected files:")
for f in sorted(corrected_files):
    print(f"   üìÑ {f}")

print("\nStarting download...")
print("(Files will download to your browser's download folder)")
print("")

# Download each file
for filename in sorted(corrected_files):
    files.download(filename)
    print(f"‚úÖ Downloaded: {filename}")

print("\n" + "=" * 60)
print("‚úÖ ALL DONE!")
print("")
print("NEXT STEPS:")
print("  1. Check your Downloads folder for the 9 CORRECTED.csv files")
print("  2. Upload them back to Google Drive to replace the originals")
print("  3. (Optional) Fix 2015 file and re-run if needed")

üì• DOWNLOADING CORRECTED FILES
Found 9 corrected files:
   üìÑ 2016_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2017_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2018_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2019_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2020_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2021_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2022_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2023_Colorado_Shelter_Data_CORRECTED.csv
   üìÑ 2024_Colorado_Shelter_Data_CORRECTED.csv

Starting download...
(Files will download to your browser's download folder)



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2016_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2017_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2018_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2019_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2020_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2021_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2022_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2023_Colorado_Shelter_Data_CORRECTED.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚úÖ Downloaded: 2024_Colorado_Shelter_Data_CORRECTED.csv

‚úÖ ALL DONE!

NEXT STEPS:
  1. Check your Downloads folder for the 9 CORRECTED.csv files
  2. Upload them back to Google Drive to replace the originals
  3. (Optional) Fix 2015 file and re-run if needed


In [13]:
# Cell 5: Fix 2015 File
import pandas as pd

print("üîß PROCESSING 2015 FILE")
print("=" * 60)

# The actual filename with (1)
filename_2015 = "2015_Colorado_Shelter_Data_Extracted (1).xlsx"

# Read the file
df = pd.read_excel(filename_2015)

# Count before
before_other_live = len(df[df['metric_type'] == 'outcome_other_live'])
before_other = len(df[df['metric_type'] == 'outcome_other'])

print(f"‚úì  2015 (Era 1 - NO CHANGE):")
print(f"   outcome_other_live={before_other_live} (POSITIVE - kept)")
print(f"   outcome_other={before_other}")

# Save corrected file (no changes for Era 1, but standardize the name)
output_filename = "2015_Colorado_Shelter_Data_CORRECTED.csv"
df.to_csv(output_filename, index=False)
print(f"   üíæ Saved: {output_filename}")

print("")
print("‚úÖ 2015 processed!")
print("")
print("Now download it:")
from google.colab import files
files.download(output_filename)

üîß PROCESSING 2015 FILE
‚úì  2015 (Era 1 - NO CHANGE):
   outcome_other_live=9 (POSITIVE - kept)
   outcome_other=0
   üíæ Saved: 2015_Colorado_Shelter_Data_CORRECTED.csv

‚úÖ 2015 processed!

Now download it:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>