## Colorado Elk Tag Application Data Preparation

In [1]:
import pandas as pd
import os

### Cleaning up data types and Choice column for applicants

In [11]:
# Set directory to iterate throughout all the the files in
directory = 'Input-Data\\Applicant-Data\\'
    
#Iterate over all the files in the directory and perfom transformations if it's an .xlsx file
for file in os.listdir(directory):
     filename = os.fsdecode(file)
     if (filename.endswith(".xlsx")) & (filename[0:3] != "Tot"):
      
      # Import File as a dataframe
      df = pd.read_excel(directory+file)

      # Replace the "-" placeholder used by CPW with 0
      df = df.replace("-",0)

      # Dictionary to map the columns as the correct data type
      convert_dict ={'Preference Points':int
      ,'A-Adult-Res':int
      ,'A-Adult-NonRes':int
      ,'A-Youth-Res':int
      ,'A-Youth-NonRes':int
      ,'A-Landowner(LPP)-Unrestricted':int
      ,'A-Landownder(LPP)-Restricted':int}

      # Remap dataframe with the correct data type
      df= df.astype(convert_dict)

      #Loop to check the preference points and hunt code, then assign the correct Choice
      Choice = 1
      for i in range (1,len(df)):
         if df.iloc[i,9] == df.iloc[i-1,9]:
            if df.iloc[i,0] == df.iloc[i-1,0]+1:
               df.iloc[i,8] = Choice
            else:
               Choice +=1
               df.iloc[i,8] = Choice
         else:
            Choice = 1
            df.iloc[i,8] = Choice

      df['Choice']=df['Choice'].fillna(0).astype(int)

      #Delete old Choice column
      del df[df.columns[0]]

      #Select only Choice 1 applicants
      output_df  = df.loc[df['Choice']==1]

      # Output the re-formatted dataframe
      output_df.to_excel('Output-Data\\Applicant-Data\\'+filename)

      continue
     else:
      continue

### Cleaning up data types and Choice column for Draws

In [12]:
# Set directory to iterate throughout all the the files in
directory = 'Input-Data\\Draw-Data\\'
    
#Iterate over all the files in the directory and perfom transformations if it's an .xlsx file    
for file in os.listdir(directory):
     filename = os.fsdecode(file)
     if (filename.endswith(".xlsx")) & (filename[0:3] != "Tot"):

      # Import File as a dataframe
      df = pd.read_excel(directory+file)

      # Replace the "-" placeholder used by CPW with 0
      df = df.replace("-",0)

      # Dictionary to map the columns as the correct data type
      convert_dict ={'Preference Points':int
      ,'D-Adult-Res':int
      ,'D-Adult-NonRes':int
      ,'D-Youth-Res':int
      ,'D-Youth-NonRes':int
      ,'D-Landowner(LPP)-Unrestricted':int
      ,'D-Landownder(LPP)-Restricted':int}

      # Remap dataframe with the correct data type
      df= df.astype(convert_dict)

      #Loop to check the preference points and hunt code, then assign the correct Choice
      Choice = 1
      for i in range (1,len(df)):
         if df.iloc[i,9] == df.iloc[i-1,9]:
            if df.iloc[i,0] == df.iloc[i-1,0]+1:
               df.iloc[i,8] = Choice
            else:
               Choice +=1
               df.iloc[i,8] = Choice
         else:
            Choice = 1
            df.iloc[i,8] = Choice

      df['Choice']=df['Choice'].fillna(0).astype(int)

      #Delete old Choice column
      del df[df.columns[0]]

      #Select only Choice 1 applicants
      output_df  = df.loc[df['Choice']==1]

      # Output the re-formatted dataframe
      output_df.to_excel('Output-Data\\Draw-Data\\'+filename)

      continue
     else:
      continue

In [13]:
# Set directory to iterate throughout all the the files in
directory = 'Output-Data\\Draw-Data\\'
    
#Iterate over all the files in the directory and perfom transformations if it's an .xlsx file
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if (filename.endswith(".xlsx")) & (filename[0:3] != "Tot"):
        df_total_tags_init = pd.read_excel(directory+file)

        #Dictionary to add how many tags were awarded for each sub category
        Total_Tags_Dict = {}
        for i in range (0,len(df_total_tags_init)):
            if i == 0:
                AR_Total_Tags_Given = df_total_tags_init.iloc[i,2]
                ANR_Total_Tags_Given = df_total_tags_init.iloc[i,3]
                YR_Total_Tags_Given = df_total_tags_init.iloc[i,4]
                YNR_Total_Tags_Given = df_total_tags_init.iloc[i,5]
                LOU_Total_Tags_Given = df_total_tags_init.iloc[i,6]
                LOR_Total_Tags_Given = df_total_tags_init.iloc[i,7]
            elif df_total_tags_init.iloc[i,9] == df_total_tags_init.iloc[i-1,9]:
                AR_Total_Tags_Given += df_total_tags_init.iloc[i,2]
                ANR_Total_Tags_Given += df_total_tags_init.iloc[i,3]
                YR_Total_Tags_Given += df_total_tags_init.iloc[i,4]
                YNR_Total_Tags_Given += df_total_tags_init.iloc[i,5]
                LOU_Total_Tags_Given += df_total_tags_init.iloc[i,6]
                LOR_Total_Tags_Given += df_total_tags_init.iloc[i,7]
            else:
                Total_Tags_Dict[df_total_tags_init.iloc[i-1,9]] = {
                    'Adult-Res':AR_Total_Tags_Given
                    ,'Adult-Non Res':ANR_Total_Tags_Given
                    ,'Youth-Res':YR_Total_Tags_Given
                    ,'Youth-Non Res':YNR_Total_Tags_Given
                    ,'Landowner-Unrestricted':LOU_Total_Tags_Given
                    ,'Landownder-Restricted':LOR_Total_Tags_Given
                }
                AR_Total_Tags_Given = df_total_tags_init.iloc[i,2]
                ANR_Total_Tags_Given = df_total_tags_init.iloc[i,3]
                YR_Total_Tags_Given = df_total_tags_init.iloc[i,4]
                YNR_Total_Tags_Given = df_total_tags_init.iloc[i,5]
                LOU_Total_Tags_Given = df_total_tags_init.iloc[i,6]
                LOR_Total_Tags_Given = df_total_tags_init.iloc[i,7]

        # Convert Dictionary to a dataframe
        Total_Tags_Awarded =pd.DataFrame.from_dict(Total_Tags_Dict,orient='index')

        # Sum each column to get the total number of tags awarded
        Total_Tags_Awarded['Total Tags'] = [a+b+c+d+e+f for a,b,c,d,e,f in zip(Total_Tags_Awarded['Adult-Res'] \
            ,Total_Tags_Awarded['Adult-Non Res']
            ,Total_Tags_Awarded['Youth-Res']
            ,Total_Tags_Awarded['Youth-Non Res']
            ,Total_Tags_Awarded['Landowner-Unrestricted']
            ,Total_Tags_Awarded['Landownder-Restricted'])]
        
        # Create a Year Column based on the filename string
        Total_Tags_Awarded['Year'] = filename[0:4]

        #Create a column for a the primary key
        Total_Tags_Awarded['Primary Key'] = [x +'-'+ y for x,y in zip(Total_Tags_Awarded['Year'], Total_Tags_Awarded.index)]

        #Output the Total Tags awarded dataframe
        Total_Tags_Awarded.to_excel('Output-Data\\Total-Awarded\\'+filename[0:4]+'-Total-Tags-Awarded.xlsx')
        continue
    else:
        continue

### Consolidate Data for each output into a master source for all date ranges

In [23]:
directory = 'Output-Data\\Applicant-Data\\'

All_Applicants = pd.DataFrame()

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if (filename.endswith(".xlsx")) & (filename[0:3] != "Tot"):
        df_Applicants = pd.read_excel(directory+file)
        All_Applicants = pd.concat([All_Applicants,df_Applicants])
        continue
    else:
        continue

del All_Applicants[All_Applicants.columns[0]]

All_Applicants['Secondary Key'] = [str(x) + "-" + y for x,y in zip(All_Applicants['Year'],All_Applicants['Hunt Code'])]

All_Applicants.to_excel('Output-Data\\Applicant-Data\\Total-All-Applicant-Preference_Points.xlsx')

In [22]:
directory = 'Output-Data\\Draw-Data\\'

All_Draw = pd.DataFrame()

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if (filename.endswith(".xlsx")) & (filename[0:3] != "Tot"):
        df_Draw = pd.read_excel(directory+file)
        All_Draw = pd.concat([All_Draw,df_Draw])
        continue
    else:
        continue

del All_Draw[All_Draw.columns[0]]

All_Draw['Secondary Key'] = [str(x) + "-" + y for x,y in zip(All_Draw['Year'],All_Draw['Hunt Code'])]

All_Draw.to_excel('Output-Data\\Draw-Data\\Total-All-Draw-Preference_Points.xlsx')

In [18]:
directory = 'Output-Data\\Total-Awarded\\'

All_Awarded = pd.DataFrame()

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if (filename.endswith(".xlsx")) & (filename[0:3] != "All"):
        df_Total_Awarded = pd.read_excel(directory+file)
        All_Awarded = pd.concat([All_Awarded,df_Total_Awarded])
        continue
    else:
        continue

del All_Awarded[All_Awarded.columns[0]]

All_Awarded.to_excel('Output-Data\\Total-Awarded\\All-Total-Tags-Awarded.xlsx')