In [15]:
#standard imports
import pandas as pd
import numpy as np


def prep_bees():
    '''This function loads the bee_colony_loss.csv into a dataframe, cleans and sorts it, and returns a dataframe.'''
    # read the csv into a pandas dataframe
    df = pd.read_csv('bee_colony_loss.csv')
    # drop the unnamed column
    df = df.drop(columns='Unnamed: 0')
    # sort by descending year and ascending state
    df = df.sort_values(['year','state'], ascending=[False,True])
    # drop nulls
    df = df.dropna()
    # lowercase all strings in state and replace spaces with underscores
    df.state = df.state.str.lower().str.replace(' ','_')
    # lowercase all strings in the season column
    df.season = df.season.str.lower()
    # remove observations that have 10 or less beekeepers
    df = df[df.beekeepers > 10]
    # drop duplicate rows
    df = df.drop_duplicates()
    # change total_loss column to float
    df.total_loss = df.total_loss.astype(float)
    # change average_loss column to float
    df.average_loss = df.average_loss.astype(float)
    # change ending_colonies column to int
    df.ending_colonies = df.ending_colonies.astype(int)
    # change colonies_lost column to int
    df.colonies_lost = df.colonies_lost.astype(int)
    # pull only annual season data
    df = df[df.season == "annual"]
    #pull non multistates and non continental usa data
    df = df[(df.state != "multistates")& (df.state != "non_continental_usa")]
    
    # return the cleaned and sorted dataframe
    return df


def state_ansi():
    ''' This function will load state ansi from csv and turn state with its corresponding ansi'''
    #read the csv
    df = pd.read_csv("state_ansi.txt",sep = "|")
    #lower case column names 
    df.columns = df.columns.str.lower()
    #lower case string values on the column and replace wmpty spaces with underscore
    df.state_name = df.state_name.str.lower().str.replace(' ','_')
    #rename column names and drop unnecessary columns
    df = df.rename(columns = {"state":"ansi", "state_name":"state"}).drop(columns = ["stusab", "statens"])
    
    #return back dataframe
    return df

def geo_data():
    ''' This function will load state ansi from csv and turn state with its corresponding ansi'''
    #read csv
    df = pd.read_csv("state_geocords.csv", index_col = [0] )
    #rename column
    df= df.rename(columns = {"name":"state"})
    # lowercase values of column and replace spaces with underscore
    df.state = df.state.str.lower().str.replace(' ','_')
    #pull only useful column
    df = df[["state","latitude","longitude"]]
    
    #return back dataframe
    return df

def bee_merged():
    """This function will call in three different function and merge them all"""
    #call in prep bees function
    df = prep_bees()
    #call in function for state  ansi data
    df1 = state_ansi()
    #call in function for geo data
    df2 = geo_data()
    #left join prep_bees dataset with state_ansi
    df = df.merge(df1, on = 'state', how = 'left')
    #left join prep_bees dataster with geo_state
    df = df.merge(df2, on="state", how = "left")
    
    #return back dataframe
    return df

In [18]:
bee_merged()

Unnamed: 0,state,year,season,beekeepers,total_loss,average_loss,starting_colonies,colonies_lost,ending_colonies,beekeepers_exclusive_to_state,colonies_exclusive_to_state,ansi,latitude,longitude
0,alabama,2022,annual,33,36.488812,34.260096,316,212,369,100.000000,100.000000,1,32.806671,-86.791130
1,arkansas,2022,annual,18,51.254480,53.867865,152,143,136,94.444444,97.368421,5,34.969704,-92.373123
2,arkansas,2022,annual,17,49.411765,52.869897,148,126,129,100.000000,100.000000,5,34.969704,-92.373123
3,california,2022,annual,89,33.269667,42.818791,166009,85526,171543,67.415730,25.320314,6,36.116203,-119.681564
4,california,2022,annual,29,36.752854,35.811393,123975,73971,127295,0.000000,0.000000,6,36.116203,-119.681564
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043,washington,2011,annual,61,36.026936,38.047901,340,214,380,100.000000,100.000000,53,47.400902,-121.490494
1044,west_virginia,2011,annual,25,78.585462,40.723147,193,400,109,92.000000,48.704663,54,38.491226,-80.954453
1045,west_virginia,2011,annual,23,41.250000,36.881733,94,66,94,100.000000,100.000000,54,38.491226,-80.954453
1046,wisconsin,2011,annual,50,34.585065,51.882699,3773,2413,4564,92.000000,14.020673,55,44.268543,-89.616508


In [20]:
#check null
bee_merged().isna().any()

state                            False
year                             False
season                           False
beekeepers                       False
total_loss                       False
average_loss                     False
starting_colonies                False
colonies_lost                    False
ending_colonies                  False
beekeepers_exclusive_to_state    False
colonies_exclusive_to_state      False
ansi                             False
latitude                         False
longitude                        False
dtype: bool