In [79]:
# Dependencies
from flask import Flask, render_template, redirect, Markup, jsonify
from flask_pymongo import PyMongo
from bson.json_util import dumps
import pymongo
from flask_cors import CORS
import pandas as pd
import json

In [80]:
app = Flask(__name__)

CORS(app, support_credentials = True)

# Use flask_pymongo to set up mongo connection
app.config["MONGO_URI"] = "mongodb://localhost:27017/election2020"
mongo = PyMongo(app)

In [95]:
# create route that rendrs index.html template
# @app.route("/")

def getData():
    # Pull data from Mongo
    candidate = mongo.db.pCandidate.find()
    state = mongo.db.pState.find()
    county = mongo.db.pCounty.find()
    county_IDs = mongo.db.countyIds.find()

    all_data = [candidate, state, county, county_IDs ]

    return all_data

def index():

    # Create dictionary to for topojson state ids
    state_ids = {"Alabama": "01",
                 "Alaska": "02",
                 "Arizona": "04",
                 "Arkansas": "05",
                 "California": "06",
                 "Colorado": "08",
                 "Connecticut": "09",
                 "Delaware": "10",
                 "District of Columbia": "11",
                 "Florida": "12",
                 "Georgia": "13",
                 "Hawaii": "15",
                 "Idaho": "16",
                 "Illinois": "17",
                 "Indiana": "18",
                 "Iowa": "19",
                 "Kansas": "20",
                 "Kentucky": "21",
                 "Louisiana": "22",
                 "Maine": "23",
                 "Maryland": "24",
                 "Massachusetts": "25",
                 "Michigan": "26",
                 "Minnesota": "27",
                 "Mississippi": "28",
                 "Missouri": "29",
                 "Montana": "30",
                 "Nebraska": "31",
                 "Nevada": "32",
                 "New Hampshire": "33",
                 "New Jersey": "34",
                 "New Mexico": "35",
                 "New York": "36",
                 "North Carolina": "37",
                 "North Dakota": "38",
                 "Ohio": "39",
                 "Oklahoma": "40",
                 "Oregon": "41",
                 "Pennsylvania": "42",
                 "Rhode Island": "44",
                 "South Carolina": "45",
                 "South Dakota": "46",
                 "Tennessee": "47",
                 "Texas": "48",
                 "Utah": "49",
                 "Vermont": "50",
                 "Virginia": "51",
                 "Washington": "53",
                 "West Virginia": "54",
                 "Wisconsin": "55",
                 "Wyoming": "56"}

    all_data = getData()

    return_list = []

    for dataset in all_data:

        list_cur = list(dataset)
        json_data = dumps(list_cur, ensure_ascii=False).encode('utf8')
        return_list.append(json_data)

    # Pull only candidate data
    candidate_data = return_list[0]

    # Convert string of json to a json file
    candidate_json = json.loads(candidate_data)

    # Convert json file to data frame
    df = pd.DataFrame.from_records(candidate_json)

    # Filter data by our two candidates (Biden and Trump)
    candidate_df = df.loc[(df["candidate"] == "Joe Biden") | (
        df["candidate"] == "Donald Trump"), :]

    # Group data by states
    condensed_df = candidate_df.groupby(["state", "candidate"]).sum()

    # Calculate Democrat win percentage
    i = 0         # 0 means Trump, 1 means Biden
    count = 0
    finalJson = {}
    percentDemStates = {}
    x = 1 
       
    for index, row in condensed_df.iterrows():
        if i == 0:
            count = count + row["total_votes"]
            i = i + 1
        else:
            count = count + row["total_votes"]
            percent = row["total_votes"] / count
            thestate = row.name[0]
            state_id = state_ids.get(thestate)

            state_info = {state_id: percent}

            percentDemStates.update(state_info)

            i = 0
            count = 0
            x = x + 1

            
            
    # County votes calculation
    
    # Pull in topojson county ID data from mongo
    counties = pd.DataFrame(columns = ['Name', 'ID'])
    countyIds_data = return_list[3]
    countyIds_json = json.loads(countyIds_data)
    
    # Create dictionary with county name keys and id values
    for element in countyIds_json:
        ids = element.get("id")
        names = element.get("name")
        
        if names == "De Kalb":
            names = "DeKalb"
        
        counties = counties.append({'Name' : names, 'ID' : ids}, ignore_index = True)
    
    counties.to_csv("counties.csv")
    
    # Clean data
    # Delete "County" from any county names
    county_name_list = candidate_df["county"].tolist()
    
    new_county_list = []
    for county in county_name_list:
        
        county_end = county.find(' County')
        parish_end = county.find(' Parish')
        ctytwnship_end = county.find( ' Cty Townships')
        
        if county_end > 0:
            updated_county = county[0:county_end]
        elif parish_end > 0:
            updated_county = county[0:parish_end]
        elif ctytwnship_end > 0:
            updated_county = county[0:ctytwnship_end]
        else:
            updated_county = county
            
        new_county_list.append(updated_county)
           
    # Change original county names with new county list
    candidate_df["new_county_name"] = new_county_list
    
    
    # Append state IDs
    # Define blank state id list
    state_id_lst = []
    
    # Pull un-edited state column from original dataset
    state_lst = candidate_df["state"]
    
    # Loop through state_lst and append the state id (taken from the state_ids dict) to a new list (state_id_lst)
    for astate in state_lst:
        the_id = state_ids.get(astate)
        the_id = int(the_id)*1000
        state_id_lst.append(the_id)
    
    # Add a column to the candidate_df of the corresponding state ids
    candidate_df["state_id"] = state_id_lst  
    
    
    # Calculate Democrat win percentage
    i = 0         # 0 means Trump, 1 means Biden
    count = 0
    percentDemCounties = {}
    x = 1 

    
    countiesCandidate_df = candidate_df.groupby(["new_county_name","state_id", "candidate" ]).sum()
    
    
    
    countiesCandidate_df.to_csv("counties.csv")
    for index, row in countiesCandidate_df.iterrows():
        
        if i == 0:
            count = count + row["total_votes"]
            i = i + 1
        else:
            count = count + row["total_votes"]
            
            # Two "counties" in the dataset (Cary Plt. and Kingsbury Plt.) have Biden and Trump at 0 votes so go 50% for each
            try:
                percent = row["total_votes"] / count
            
            except:
                percent = .5
                
            
            county = row.name[0]
            state_id = row.name[1]        
            
            if county == "Oglala Lakota":
                print("state_id")
                
            try:
                county_id_df = counties.loc[(counties["Name"] == county) & (counties["ID"] > state_id) & (counties["ID"] < state_id + 1000), :] 
                county_id = county_id_df['ID'].values[0]
            
            except:
                county_id = None              
            
            if county_id != None: 
                county_info = {county_id: percent}
                percentDemCounties.update(county_info)
                

                

            i = 0
            count = 0
            x = x + 1
    
    finalJson.update({"percentDemStates": percentDemStates})
    finalJson.update({"percentDemCounties": percentDemCounties})
    
    return countiesCandidate_df



#if __name__ == "__main__":
    #app.run(debug=True)

In [96]:
countiesCandidate_df  = index()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [55]:
countiesCandidate_df

Unnamed: 0,Name,ID
0,Alabama,1000
1,Autauga,1001
2,Baldwin,1003
3,Barbour,1005
4,Bibb,1007
...,...,...
3349,Ngardmau,78222
3350,Ngatpang,78224
3351,Ngchesar,78226
3352,Peleliu,78350


In [56]:
county = "DeKalb"
state_id = 13000
#county_id_df = counties.loc[(counties["Name"] == county) & (counties["ID"] > state_id) & (counties["ID"] < state_id + 1000), :]

county_id_df = counties.loc[(counties["Name"] == county), :]

county_id_df

Unnamed: 0,Name,ID
25,DeKalb,1049
438,DeKalb,13089
624,DeKalb,17037
725,DeKalb,18033
1537,DeKalb,29063
2489,DeKalb,47041


In [209]:
county = "Paulding"
state_id = 13000

countiesCandidate_df.loc[countiesCandidate_df.loc[county]]


#county_id_df = counties.loc[(counties["Name"] == county) & (counties["ID"] > state_id) & (counties["ID"] < state_id + 1000), :] 
#county_id = county_id_df['ID'].values[0]
#county_id

NotImplementedError: Indexing a MultiIndex with a DataFrame key is not implemented

In [46]:
    candidate = mongo.db.pCandidate.find()
    state = mongo.db.pState.find()
    county = mongo.db.pCounty.find()

    all_data = [candidate, state, county]

    return_list = []

    for dataset in all_data:

        list_cur = list(dataset)
        json_data = dumps(list_cur)
        return_list.append(json_data)
    
    # Pull only candidate data
    candidate_data = return_list[0]
    
    # Convert string of json to a json file
    candidate_json = json.loads(candidate_data)
    
    # Convert json file to data frame
    df = pd.DataFrame.from_records(candidate_json)
    
    # Filter data by our two candidates (Biden and Trump)
    candidate_df = df.loc[(df["candidate"] == "Joe Biden") | (df["candidate"] == "Donald Trump"),:]
    
    # Group data by states
    condensed_df = candidate_df.groupby(["state","candidate"]).sum()
    
    condensed_df



Unnamed: 0_level_0,Unnamed: 1_level_0,total_votes,won
state,candidate,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,Donald Trump,1441168,54.0
Alabama,Joe Biden,849648,13.0
Alaska,Donald Trump,189892,20.0
Alaska,Joe Biden,153405,17.0
Arizona,Donald Trump,1661686,10.0
...,...,...,...
West Virginia,Joe Biden,235984,0.0
Wisconsin,Donald Trump,1610065,58.0
Wisconsin,Joe Biden,1630673,14.0
Wyoming,Donald Trump,193559,21.0


In [45]:
    for index, row in condensed_df.iterrows():
        print(row.name[0])

Alabama
Alabama
Alaska
Alaska
Arizona
Arizona
Arkansas
Arkansas
California
California
Colorado
Colorado
Connecticut
Connecticut
Delaware
Delaware
District of Columbia
District of Columbia
Florida
Florida
Georgia
Georgia
Hawaii
Hawaii
Idaho
Idaho
Illinois
Illinois
Indiana
Indiana
Iowa
Iowa
Kansas
Kansas
Kentucky
Kentucky
Louisiana
Louisiana
Maine
Maine
Maryland
Maryland
Massachusetts
Massachusetts
Michigan
Michigan
Minnesota
Minnesota
Mississippi
Mississippi
Missouri
Missouri
Montana
Montana
Nebraska
Nebraska
Nevada
Nevada
New Hampshire
New Hampshire
New Jersey
New Jersey
New Mexico
New Mexico
New York
New York
North Carolina
North Carolina
North Dakota
North Dakota
Ohio
Ohio
Oklahoma
Oklahoma
Oregon
Oregon
Pennsylvania
Pennsylvania
Rhode Island
Rhode Island
South Carolina
South Carolina
South Dakota
South Dakota
Tennessee
Tennessee
Texas
Texas
Utah
Utah
Vermont
Vermont
Virginia
Virginia
Washington
Washington
West Virginia
West Virginia
Wisconsin
Wisconsin
Wyoming
Wyoming


In [53]:
string = "string"
string.find(" County")

-1