In [15]:
# load in the data from excel and format it to the state data correctly 
import pandas as pd
pd.set_option('display.max_columns', None)


dataframe_bantable = pd.read_csv('BOOK_BANS_TABLE.csv')


fl_table = dataframe_bantable[dataframe_bantable["State"] == "FL"]
unique_districts = fl_table["Overseeing_Agency"].unique()
unique_districts



array(['Indian River County Schools', 'Clay County District Schools',
       'Duval County Public Schools', 'Seminole County Public Schools',
       'Collier County Public Schools', 'Santa Rosa County Schools',
       'Orange County Public Schools', 'Escambia County Public Schools',
       'School District of Manatee County',
       'St. John’s County School District', 'Lake County Schools',
       'Volusia County Schools', 'Brevard County Public Schools',
       'Hernando County Schools', 'St. Johns County School District',
       'Flagler Schools', 'Highlands School Board',
       'Jackson County School Board', 'Martin County Schools',
       'Miami-Dade County School Disrict',
       'Nassau County School District', 'Walton County School District',
       'Broward County Public Schools', 'Alachua County Public Schools',
       'Pinellas School District', 'Palm Beach County School District',
       'Flagler School District', 'Hillsborough County Public Schools',
       'Leon County S

Included datasets

1. banned books dataset in BOOK_BANS_TABLE.csv aquired from Dr. Magnussen 
2. https://www.arcgis.com/home/item.html?id=3dd46e71dba24d32abca61d8fb32e06d points for Florida Schools
3. Florida school districts census refrence https://docs.google.com/spreadsheets/d/1nWxcVLLTVeU5Lc789iJeoUw9dCOvLdQ7fLASPnK2co8/edit#gid=0


### Here are all the seperate Overseeing boards and counties for these bans I will be using this data to fill out more information about Florida. 

['Indian River County Schools', 'Clay County District Schools',  'Duval County Public Schools', 'Seminole County Public Schools',  'Collier County Public Schools', 'Santa Rosa County Schools',  'Orange County Public Schools', 'Escambia County Public Schools',  'School District of Manatee County',  'St. John’s County School District', 'Lake County Schools',  'Volusia County Schools', 'Brevard County Public Schools',  'Hernando County Schools', 'St. Johns County School District',  'Flagler Schools', 'Highlands School Board',  'Jackson County School Board', 'Martin County Schools',  'Miami-Dade County School Disrict',  'Nassau County School District', 'Walton County School District',  'Broward County Public Schools', 'Alachua County Public Schools',  'Pinellas School District', 'Palm Beach County School District',  'Flagler School District', 'Hillsborough County Public Schools',  'Leon County Schools', 'Marion County Public Schools', 'Osceola',  'The School District of Osceola County, Florida',  'Lee County School', 'Polk County Schools',  'The School District of Palm Beach County',  'Pinellas County Schools', 'St. Lucie Public Schools',  'Okaloosa County School District', 'Hillsborough County Schools',  'Pinellas', 'Lake Country Schools', 'Pasco County Schools',  'Sarasota County School']

### Creating Summary tables of the Data
In order to join our data with State and the School District Shape files we need to make 
individual entries for each State that summarize over entries for that state, so we can do a 1:1 join with them in **ArcgisPro**. 

In order to do this we have the following script that summarizes over the **dataframe_bantable** which is our information from the BANNED_BOOKS.csv file. 


Since books have multiple Ban statuses our summaries will include statuses for books awaiting ban 
and books with final decisions: 

**The new table includes:**

        State | most banned book | top 5 banned books | sum_total_challenges_bans | Most_Occurring_Book       |
        int   |    int           |   int              |         int               | (occurences:int, Name:str)|

In [16]:
import pandas as pd

# uses the definitions about what states have done if the definition is unknown 
# for the response the book could still be challenged and just nothing has happened to it 


def create_summary(response, df, col:str):
    # Filter the DataFrame based on the response
    filtered_df = df[df[col] == response]
    
    # Group the filtered DataFrame by the 'State' column and count the number of entries in each group
    summary = filtered_df.groupby('State').size().reset_index(name='Count_of_{}_Books'.format(response))
    
    # If some states have no books for this response, fill those with 0
    all_states = df['State'].unique()
    missing_states = set(all_states) - set(summary['State'].unique())
    missing_summary = pd.DataFrame({'State': list(missing_states), 'Count_of_{}_Books'.format(response): [0] * len(missing_states)})
    
    # Concatenate summary and missing_summary to ensure all states are included
    summary = pd.concat([summary, missing_summary])
    
    # Reset index
    summary.reset_index(drop=True, inplace=True)
    
    return summary


def create_summary_table(column: str, response_types:list) -> pd.DataFrame:
        
    # Create a list of DataFrames containing summaries for each response type
    summary_list = [create_summary(response, dataframe_bantable, column) for response in response_types]
    
    # Merge DataFrames horizontally based on the 'State' column
    table = pd.concat(summary_list, axis=1)
    
    # Drop duplicate 'State' columns (if any)
    table = table.loc[:, ~table.columns.duplicated()]
    
    # add a sum of all the types of bans to the end of the row 
    table["sum_total_challenges_bans"] = table.iloc[:, 1:].sum(axis=1)
    
    # Find the most occurring book for each state
    most_occuring_books = dataframe_bantable.groupby('State')['New_Book_Title'].agg(
        lambda x: (x.value_counts().iloc[0], x.value_counts().index[0]) if not x.empty else (0, None)
    ).reset_index(name='Most_Occurring_Book')
    
    # Merge the most occurring book information with the summary
    table = pd.merge(table, most_occuring_books, on='State', how='left')
    
    return table

# for decision 
response_types = ['Unknown', 'Still in Process', 'Banned/Removed',
       'Retained/Restricted', 'Returned', 'Reshelved',
       'Retained Restricted', 'Retained', 'Weeded/Deselected', 'Removed',
       'No Action', 'Stickered', 'Still in ProcessStill in Process',
       '"Lost"', 'Removed - Curriculum',
       'Challenged - Removed While Under Review', 'Weeded',
       'Weeded/Deselectednned',
       'Library conducted review process and decided to keep book - multiple people spoke out for the book. Citizens wanted the library to change the review process.',
       'Green, Katie', '02/12/2024', 'Banned From Library']
# for initial response 
init_response_types = ["Off Shelves", "Unknown", "On shelves", "Challenged - Not in District/School or library institution","Not in District","Never On Shelves", "On Shelves", "Retained,'N/A - See Notes"]
decision_summary = create_summary_table('Decision', response_types)
initial_response = create_summary_table('Initial_Response', init_response_types)

fl_states_init = initial_response[initial_response['State'] == 'FL']
fl_states_final = decision_summary[decision_summary['State'] == 'FL']


In [74]:
fl_states_init


Unnamed: 0,State,Count_of_Off Shelves_Books,Count_of_Unknown_Books,Count_of_On shelves_Books,Count_of_Challenged - Not in District/School or library institution_Books,Count_of_Not in District_Books,Count_of_Never On Shelves_Books,Count_of_On Shelves_Books,"Count_of_Retained,'N/A - See Notes_Books",sum_total_challenges_bans,Most_Occurring_Book
5,FL,2961,52,44,7,0,0,0,0,3064,"(26, Tricks)"


In [75]:
fl_states_final

Unnamed: 0,State,Count_of_Unknown_Books,Count_of_Still in Process_Books,Count_of_Banned/Removed_Books,Count_of_Retained/Restricted_Books,Count_of_Returned_Books,Count_of_Reshelved_Books,Count_of_Retained Restricted_Books,Count_of_Retained_Books,Count_of_Weeded/Deselected_Books,...,Count_of_Removed - Curriculum_Books,Count_of_Challenged - Removed While Under Review_Books,Count_of_Weeded_Books,Count_of_Weeded/Deselectednned_Books,Count_of_Library conducted review process and decided to keep book - multiple people spoke out for the book. Citizens wanted the library to change the review process._Books,"Count_of_Green, Katie_Books",Count_of_02/12/2024_Books,Count_of_Banned From Library_Books,sum_total_challenges_bans,Most_Occurring_Book
7,FL,572,32,2,6,0,3,1,13,0,...,1,0,0,0,0,0,0,0,634,"(26, Tricks)"


# Florida school districts mapping 

Ok since florida has the most school districts I am going to map the school district table names to actual 
school districts by hand for a more in-depth analysis. 

To do this I take the overseeing district from the **Overseeing_Agency** column, and then creaate a new column titled 
**Inferred_District_County** using the mapping I made here https://docs.google.com/spreadsheets/d/1nWxcVLLTVeU5Lc789iJeoUw9dCOvLdQ7fLASPnK2co8/edit#gid=0. 

In [17]:
dataframe_bantable[dataframe_bantable["State"] == "FL"]


Unnamed: 0,ID,New_Book_Author,New_Book_Title,Series_Name,Book_Co_Author,Illustrators,Challenge_Type,Initial_Response,Challenge_Notes,Decision,Restrictions,Restriction_Details,Library_Type,Month,Year,State,Overseeing_Agency,City,County,Sch_Name,Lib_name,Police,Book_Status,Data_Entry_Date,Links1,Links2,Links3,Links4,Last_Updated,Challenge_Init
0,10,"Thomas, Rachael L.",#BlackLivesMatter: Protesting Racism,,,,Unclear,Off Shelves,Status Change: 2/28/22,Unknown,,,School,November,2021.0,FL,Indian River County Schools,,Indian River,,,Unknown,RETURNED - FL as 5/2022,,,,,,,Affiliation Unknown
3,8573,"Ukazu, Ngozi",#Hockey,"Check, Please!",,,Formal,Off Shelves,Clay County Schools keeps an online database o...,Banned/Removed,,,School,SY,2223.0,FL,Clay County District Schools,,Clay,,,,,10/25/2023,https://docs.google.com/spreadsheets/d/1iP1nVO...,,,,,Affiliation Unknown
7,12,"McNeil, Gretchen",#MurderTrending,MurderTrending,,,Formal,Unknown,,Unknown,,,School,July,2022.0,FL,Clay County District Schools,,Clay,,,,,,,,,,,Affiliation Unknown
10,14370,"Jensen, Kelly",(Don't) Call Me Crazy: 33 Voices Start the Con...,,,,Formal,Off Shelves,"""No violation of Florida Statute, Remain in Co...",Retained/Restricted,Yes,Restricted to MS and HS,School,12,2023.0,FL,Clay County District Schools,,Clay,,,,,02/16/2024,https://docs.google.com/spreadsheets/d/1iP1nVO...,,,,,Affiliation Unknown
16,21,"Ewert, Marcus","10,000 Dresses",,,"Ray, Rex",Formal,Off Shelves,NLTE - challenged again in 1/2023,Retained/Restricted,Yes,Restricted to JH and HS,School,01,2023.0,FL,Clay County District Schools,,Clay,,,,,,From district website - update late Jan,,,,10/23/2023,Group
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13553,10856,Amir,Zahra's Paradise,,,Khalil,Formal,Off Shelves,,Weeded/Deselected,,,School,05,2023.0,FL,Santa Rosa County Schools,,Santa Rosa,,,,,11/7/2023,https://www.santarosaschools.org/ (quick links...,,,,,Community Member
13556,8662,"Muth, Jon J.",Zen Shorts,,,,Formal,Off Shelves,Clay County Schools keeps an online database o...,Retained,,,School,SY,2223.0,FL,Clay County District Schools,,Clay,,,,,10/25/2023,https://docs.google.com/spreadsheets/d/1iP1nVO...,,,,,Affiliation Unknown
13557,7317,"Muth, Jon J.",Zen Shorts,,,,Internal/Administrative Review,Off Shelves,,Unknown,,,School,January,2022.0,FL,Duval County Public Schools,,Duval,,,Unknown,,,,,,,,Administration
13559,7318,"Bunker, Lisa",Zenobia July,,,,Formal,Off Shelves,"NLTE,HB 1557",Retained,,,School,July,2022.0,FL,Clay County District Schools,,Clay,,,,,,,,,,10/24/2023,Group
