In [1]:
import pandas as pd
import numpy as np
import sklearn
import geopandas as gpd
from shapely.geometry import Point, Polygon, box
import matplotlib
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer

In [3]:
animals = pd.read_csv('Animals.csv')
animals

Unnamed: 0,State,County of County_Animals,ANSI of County_Animals,Ag District,Ag District Code,Year of County_Animals,"CATTLE, COWS, BEEF - INVENTORY","CATTLE, COWS, MILK - INVENTORY","CATTLE, INCL CALVES - INVENTORY","CATTLE, ON FEED - INVENTORY",HOGS - INVENTORY,TURKEYS - INVENTORY
0,MINNESOTA,AITKIN,27001,EAST CENTRAL,60,2010,5100.0,400.0,12300.0,,,
1,MINNESOTA,AITKIN,27001,EAST CENTRAL,60,2011,4800.0,400.0,12100.0,,,
2,MINNESOTA,AITKIN,27001,EAST CENTRAL,60,2012,4900.0,400.0,11900.0,52.0,48.0,39.0
3,MINNESOTA,AITKIN,27001,EAST CENTRAL,60,2013,4800.0,300.0,10400.0,,,
4,MINNESOTA,AITKIN,27001,EAST CENTRAL,60,2014,4400.0,300.0,9900.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1212,MINNESOTA,YELLOW MEDICINE,27173,WEST CENTRAL,40,2019,3500.0,1400.0,27500.0,,,
1213,MINNESOTA,YELLOW MEDICINE,27173,WEST CENTRAL,40,2020,3500.0,1400.0,27000.0,,,
1214,MINNESOTA,YELLOW MEDICINE,27173,WEST CENTRAL,40,2021,3200.0,1400.0,25500.0,,,
1215,MINNESOTA,YELLOW MEDICINE,27173,WEST CENTRAL,40,2022,3400.0,1400.0,26500.0,3136.0,214088.0,


In [None]:
AgDistricts = pd.read_csv('AgDistricts.csv')
AgDistricts

In [None]:
cpi = pd.read_csv('CropProductivityIndex.csv')
cpi

In [None]:
crops = pd.read_csv('Crops.csv')
crops

In [None]:
fertilizer = pd.read_csv('MUDAC2024/FertilizerConsumption.csv')
fertilizer

In [None]:
tillable_land = pd.read_csv('MUDAC2024/TillableLand.csv')
tillable_land

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
minnesota = world[world['name'] == 'United States of America']
minnesota.plot()

In [None]:
roads = gpd.read_file("MUDAC2024/tl_2020_78030_roads.shp")
roads.plot()

In [None]:
def custom_interpolation(group):
    cattle_columns = ['CATTLE, COWS, BEEF - INVENTORY', 'CATTLE, COWS, MILK - INVENTORY',
                      'CATTLE, INCL CALVES - INVENTORY']
    
    group[cattle_columns] = group[cattle_columns].ffill()
    
    group['HOGS - INVENTORY'] = group['HOGS - INVENTORY'].interpolate(method='linear', limit_direction='forward')
    group['TURKEYS - INVENTORY'] = group['TURKEYS - INVENTORY'].interpolate(method='linear', limit_direction='forward')
    group['CATTLE, ON FEED - INVENTORY'] = group['CATTLE, ON FEED - INVENTORY'].interpolate(method='linear', limit_direction='forward')
    
    for i in range(len(group)):
        if pd.isna(group.at[group.index[i], 'HOGS - INVENTORY']):
            prev_hogs = group.at[group.index[i-1], 'HOGS - INVENTORY']
            next_hogs = group.at[group.index[min(i+1, len(group)-1)], 'HOGS - INVENTORY']
            group.at[group.index[i], 'HOGS - INVENTORY'] = prev_hogs + (next_hogs - prev_hogs) / 5
            group.at[group.index[i], 'HOGS - INVENTORY'] = group.at[group.index[i], 'HOGS - INVENTORY']
        
        if pd.isna(group.at[group.index[i], 'TURKEYS - INVENTORY']):
            prev_turkeys = group.at[group.index[i-1], 'TURKEYS - INVENTORY']
            next_turkeys = group.at[group.index[min(i+1, len(group)-1)], 'TURKEYS - INVENTORY']
            group.at[group.index[i], 'TURKEYS - INVENTORY'] = prev_turkeys + (next_turkeys - prev_turkeys) / 5
            group.at[group.index[i], 'TURKEYS - INVENTORY'] = group.at[group.index[i], 'TURKEYS - INVENTORY']
            
        if pd.isna(group.at[group.index[i], 'CATTLE, ON FEED - INVENTORY']):
            prev_feed_cows = group.at[group.index[i-1], 'CATTLE, ON FEED - INVENTORY']
            next_feed_cows = group.at[group.index[min(i+1, len(group)-1)], 'CATTLE, ON FEED - INVENTORY']
            group.at[group.index[i], 'CATTLE, ON FEED - INVENTORY'] = prev_feed_cows + (next_feed_cows - prev_feed_cows) / 5
            group.at[group.index[i], 'CATTLE, ON FEED - INVENTORY'] = group.at[group.index[i], 'CATTLE, ON FEED - INVENTORY']
    
    for column in cattle_columns:
        if '2010' not in str(group.index[0]) and '2011' not in str(group.index[0]):
            if column != 'CATTLE, ON FEED - INVENTORY':
                if pd.isna(group.at[group.index[i], column]):
                    prev_val = group.at[group.index[i-1], column]
                    next_val = group.at[group.index[min(i+1, len(group)-1)], column]
                    group.at[group.index[i], column] = prev_val + (next_val - prev_val) / 5
                    group.at[group.index[i], column] = group.at[group.index[i], column]
    
    for column in ['HOGS - INVENTORY', 'TURKEYS - INVENTORY', 'CATTLE, ON FEED - INVENTORY']:
        first_valid_index = group[column].first_valid_index()
        if first_valid_index is not None:
            first_valid_value = group.at[first_valid_index, column]
            group[column] = group[column].fillna(first_valid_value)
    
    return group

In [None]:
animals = animals.sort_values(by = ['County of County_Animals', 'Year of County_Animals'])
grouped = animals.groupby('County of County_Animals')
interpolated_df = grouped.apply(custom_interpolation)
interpolated = interpolated_df.fillna(0).round()
interpolated.to_csv('Datasets/clean_animal.csv')

In [None]:
pd.set_option('display.max_rows', 25)