In [1]:
import pandas as pd
import numpy as np
import glob
import json
import csv
import requests
from math import pi

In [2]:
#Generate the dictionary of codes for each state provided in the geojson file.
url = "https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json"

response = requests.get(url)
codes = {}
if response.status_code == 200:
    data = response.json()
    for i in data['features']:
        prop_val = i['properties']
        codes[i['id']] = prop_val['name']
else:
    print("Failed to fetch data:", response.status_code)
print(codes)

{'01': 'Alabama', '02': 'Alaska', '04': 'Arizona', '05': 'Arkansas', '06': 'California', '08': 'Colorado', '09': 'Connecticut', '10': 'Delaware', '11': 'District of Columbia', '12': 'Florida', '13': 'Georgia', '15': 'Hawaii', '16': 'Idaho', '17': 'Illinois', '18': 'Indiana', '19': 'Iowa', '20': 'Kansas', '21': 'Kentucky', '22': 'Louisiana', '23': 'Maine', '24': 'Maryland', '25': 'Massachusetts', '26': 'Michigan', '27': 'Minnesota', '28': 'Mississippi', '29': 'Missouri', '30': 'Montana', '31': 'Nebraska', '32': 'Nevada', '33': 'New Hampshire', '34': 'New Jersey', '35': 'New Mexico', '36': 'New York', '37': 'North Carolina', '38': 'North Dakota', '39': 'Ohio', '40': 'Oklahoma', '41': 'Oregon', '42': 'Pennsylvania', '44': 'Rhode Island', '45': 'South Carolina', '46': 'South Dakota', '47': 'Tennessee', '48': 'Texas', '49': 'Utah', '50': 'Vermont', '51': 'Virginia', '53': 'Washington', '54': 'West Virginia', '55': 'Wisconsin', '56': 'Wyoming', '72': 'Puerto Rico'}


In [3]:
#Get Key from Value
def get_keys_from_value(dictionary, value):
    keys = [key for key, val in dictionary.items() if val == value]
    return keys

In [5]:
State_Level_data = {}
FullFilenames =  (glob.glob("../../Dataset/*.csv")) 
for i in FullFilenames:
    data = pd.read_csv(i, usecols=['scientific_name', 'city', 'state','diameter_breast_height_binned_CM'])
    data.dropna(how='any', inplace=True)
    if(not(data.empty)):
        splitToGetDiameter = data['diameter_breast_height_binned_CM'].str.split(' ')
        state = data.iloc[0][['state']].to_list()
        diameterInCMs = splitToGetDiameter.apply(lambda x: float(x[-2]) if len(x) >= 2 and x[-2].replace('.', '', 1).isdigit() else None)
        data['Area'] =  diameterInCMs.apply(lambda diameter: pi * ((diameter / 100)/2) ** 2 if diameter is not None else None)
        totalArea = data['Area'].sum()
        totalTrees = data.shape[0]
        if state[0] not in State_Level_data:
                State_Level_data[state[0]] = [totalTrees,totalArea,get_keys_from_value(codes,state[0])[0]]
        else:
            State_Level_data[state[0]][0] += totalTrees
            State_Level_data[state[0]][1] += totalArea

print(State_Level_data)


{'New Mexico': [2325, 312.43988527134985, '35'], 'California': [1123633, 137673.3848150004, '06'], 'Colorado': [344031, 41706.01838465935, '08'], 'Texas': [44321, 8051.182557365708, '48'], 'Maryland': [120620, 15868.38178869361, '24'], 'Massachusetts': [14498, 3669.2350561981243, '25'], 'Florida': [118106, 14765.77618002787, '12'], 'Ohio': [135318, 14389.783015898198, '39'], 'Iowa': [15273, 3176.7518694538517, '19'], 'Michigan': [85426, 11467.517883916358, '26'], 'North Carolina': [19782, 2612.506742689875, '37'], 'Hawaii': [14655, 2331.0226307315493, '15'], 'Indiana': [97227, 12895.75196011939, '18'], 'Tennessee': [8322, 709.3377731586706, '47'], 'Nevada': [29198, 1820.6263211862222, '32'], 'Kentucky': [32711, 2095.6529530309294, '21'], 'Wisconsin': [115365, 15803.369192295482, '55'], 'Minnesota': [174674, 21969.861388477715, '27'], 'New York': [697631, 92447.31010964904, '36'], 'Oklahoma': [8866, 1139.3256862609744, '40'], 'Kansas': [30841, 1209.9748966565803, '20'], 'Arizona': [214,

In [10]:
StatesInStateLevelData = State_Level_data.keys()
StatesExistsKey=[]
for i in StatesInStateLevelData:
    key = get_keys_from_value(codes,i)
    StatesExistsKey.append(key[0])
print(sorted(StatesExistsKey))
for key, value in codes.items():
    if key not in StatesExistsKey:
        State_Level_data[value]=[0,0,str(key)]
print(len(State_Level_data))


['04', '06', '08', '11', '12', '15', '18', '19', '20', '21', '24', '25', '26', '27', '29', '32', '35', '36', '37', '39', '40', '41', '44', '46', '47', '48', '51', '53', '55']
52


In [11]:
df = pd.DataFrame(State_Level_data.items(), columns=['name', 'data'])
df[['totalTrees', 'totalArea', 'id']] = pd.DataFrame(df['data'].tolist(), index=df.index)
df = df[['id', 'name', 'totalTrees', 'totalArea']]
df = df.sort_values(by='id')
df.to_csv('State_Level_data.csv', index=False)