In [1]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests
import json

In [2]:
# GeoJSON creator
# https://geoman.io/geojson-editor
# Awesome D3 Graph Library
# https://www.d3-graph-gallery.com/

# US Minimum Wages By State
https://www.minimum-wage.org/wage-by-state

In [3]:
# Web Scrapping Data
url ='https://www.minimum-wage.org/wage-by-state'
response=requests.get(url, timeout=5)
soup=BeautifulSoup(response.content, "html.parser")
# Empty array
minimum_wage=[]
# Loop through Table to get values
for row in soup.select('tbody tr'):
    row_text = [x.text for x in row.find_all('td')]
    minimum_wage.append(row_text)
# Create DF
minimum_wage_df=pd.DataFrame(minimum_wage)
# Rename Columns
col=['States', 'Minimum_Wage', 'Source', 'Extra']
minimum_wage_df.columns=col
# Strip Unwanted Characters
minimum_wage_df['States']=minimum_wage_df['States'].str.strip('\n')
minimum_wage_df['Minimum_Wage']=minimum_wage_df['Minimum_Wage'].str.strip('\n/ hour$')
# Drop Columns
minimum_wage_df=minimum_wage_df.drop(['Source', 'Extra'], axis=1)
minimum_wage_df.head() 

Unnamed: 0,States,Minimum_Wage
0,Alabama,7.25
1,Alaska,10.34
2,Arizona,12.15
3,Arkansas,11.0
4,California,13.0


# Minimum Wage In America
https://en.wikipedia.org/wiki/List_of_countries_by_minimum_wage

In [4]:
# Web Scrapping Data
url ='https://en.wikipedia.org/wiki/List_of_countries_by_minimum_wage'
response=requests.get(url, timeout=5) 
soup=BeautifulSoup(response.content, "html.parser")
# Empty array
global_minimum_wage=[]
# Loop through Table to get values
for row in soup.select('tbody tr'):
    row_text = [x.text for x in row.find_all('td')]
    global_minimum_wage.append(row_text)  
# Create DF
global_minimum_wage_df=pd.DataFrame(global_minimum_wage)   
# Countries in OECD
global_minimum_wage_df=global_minimum_wage_df[3:205] 
# Rename Columns
global_col=['Countries', '2018 Nominal Annual', '2018 Nominal Hourly', '2018 PPP Annual', '2018 PPP Hourly', '2018 Annual Working Hours', '2019 Nominal Annual', '2019 Nominal Hourly', '2019 PPP Annual', '2019 PPP Hourly', '2019  Working Hours']   
global_minimum_wage_df.columns=global_col
global_minimum_wage_df['Countries']=global_minimum_wage_df['Countries'].str.strip('\n')
global_minimum_wage_df['2019 Nominal Annual']=global_minimum_wage_df['2019 Nominal Annual'].str.strip('\n')
global_minimum_wage_df['2019 Nominal Annual'] = pd.to_numeric(global_minimum_wage_df['2019 Nominal Annual'],errors = 'coerce')
global_minimum_wage_df.head() 

Unnamed: 0,Countries,2018 Nominal Annual,2018 Nominal Hourly,2018 PPP Annual,2018 PPP Hourly,2018 Annual Working Hours,2019 Nominal Annual,2019 Nominal Hourly,2019 PPP Annual,2019 PPP Hourly,2019 Working Hours
3,Afghanistan,"5,500 Afghani ($67) per month for non-permanen...",866\n,"\n3,272\n\n",40\n,0.42\n,1.57,168.3%\n,2017\n\n\n,,
4,Albania,"26,000 Albanian lekë ($240) per month, in priv...","2,622\n","\n5,218\n\n",40\n,1.26\n,2.51,45.2%\n,5 May 2017\n\n\n,,
5,Algeria,"20,000 Algerian dinars ($156.19) per month, na...","2,010\n","\n6,247\n\n",40\n,0.97\n,3.0,41.6%\n,1 May 2020\n,,
6,Andorra,"€1050.40 per month, €6.06 per hour.[14]\n","14,397\n","11,020\n",40\n,6.72\n,5.3,28%\n,1 January 2019\n\n\n,,
7,Angola,"21,454 kwanza ($58) per month; paid thirteen t...",764\n,"\n2,108\n\n",44\n,0.33\n,0.92,32.7%\n,1 April 2019\n\n\n,,


In [5]:
nom_min_wage_df=pd.DataFrame()
nom_min_wage_df['Countries']=global_minimum_wage_df['Countries']
nom_min_wage_df['2019 Nominal Annual']=global_minimum_wage_df['2019 Nominal Annual'] 

In [6]:
nom_min_wage_df.at[12,'2019 Nominal Annual']=10.5   #https://wageindicator.org/salary/minimum-wage/austria/
nom_min_wage_df.at[49,'2019 Nominal Annual']=17.8   #https://www.minimum-wage.org/international/denmark (no Min)
nom_min_wage_df.at[166,'2019 Nominal Annual']=5.72 #https://checkinprice.com/average-minimum-salary-in-singapore/ (no Min)
#Finland, Sweden, Norway, Italy no minimum wage
nom_min_wage_df.at[178, '2019 Nominal Annual']=15.43 #https://www.minimum-wage.org/international/switzerland
nom_min_wage_df

Unnamed: 0,Countries,2019 Nominal Annual
3,Afghanistan,1.57
4,Albania,2.51
5,Algeria,3.00
6,Andorra,5.30
7,Angola,0.92
...,...,...
200,Venezuela,
201,Vietnam,1.77
202,Yemen,
203,Zambia,1.27


In [7]:
# https://www.retailcouncil.org/resources/quick-facts/minimum-wage-by-province/ (canada)
# https://www.minimum-wage.org/international/united-kingdom (18 to 20)
# https://businesstech.co.za/news/finance/450395/higher-south-african-minimum-wages-proposed-for-2021/ (South Africa)
# https://www.reuters.com/article/turkey-economy-wages/turkey-raises-gross-minimum-wage-by-more-than-21-in-2021-idUKKBN2920NF?edition-redirect=uk (Turkey)
# https://www.minimum-wage.org/international/saudi-arabia (Saudi Arabia)
# https://www.minimum-wage.org/international/russia (Russia)
# https://www.mexperience.com/mexicos-minimum-wage-2021/
# https://www.minimum-wage.org/international/south-korea
# https://www.minimum-wage.org/international/japan
# https://www.minimum-wage.org/international/indonesia
# 

In [8]:
nom_min_wage_df[20:70] 

Unnamed: 0,Countries,2019 Nominal Annual
23,Bolivia,3.21
24,Bosnia and Herzegovina,2.89
25,Botswana,0.56
26,Brazil,2.85
27,Brunei,
28,Bulgaria,4.91
29,Burkina Faso,0.92
30,Burundi,
31,Cambodia,
32,Cameroon,0.87


# State Lat and Long

In [9]:
# Web Scrapping
url ='https://inkplant.com/code/state-latitudes-longitudes'
response=requests.get(url, timeout=5) 
soup=BeautifulSoup(response.content, "html.parser")
# print(soup)
# Empty array
state_lat_long=[]
# Loop through Table to get values
for row in soup.select('tr'):
    row_text = [x.text for x in row.find_all('td')] 
    state_lat_long.append(row_text)  
state_lat_long_df=pd.DataFrame(state_lat_long)
new_header = state_lat_long_df.iloc[0] #grab the first row for the header
state_lat_long_df = state_lat_long_df[1:] #take the data less the header row
state_lat_long_df.columns = new_header #set the header row as the df header
state_lat_long_df=state_lat_long_df.append({'State':'Federal', 'Latitude':'37.09024', 'Longitude':'-95.712891'}, ignore_index=True)
state_lat_long_df.head() 

Unnamed: 0,State,Latitude,Longitude
0,Alabama,32.806671,-86.79113
1,Alaska,61.370716,-152.404419
2,Arizona,33.729759,-111.431221
3,Arkansas,34.969704,-92.373123
4,California,36.116203,-119.681564


In [10]:
states=[{'state':'Alabama', 'Lat':'33.52066', 'Long': '-86.80249'},
{'state':'Alaska', 'Lat':'61.21806', 'Long': ' -149.90028'},
{'state':'Arizona', 'Lat':'33.44838', 'Long': '-112.07404'},
{'state':'Arkansas', 'Lat':'34.969704', 'Long': '-92.373123'},
{'state':'California', 'Lat':'32.71571', 'Long': '-117.16472'},
{'state':'Colorado', 'Lat':'39.73915', 'Long': '-104.9847'},
{'state':'Connecticut', 'Lat':'41.17923', 'Long': '-73.18945'},
{'state':'Delaware', 'Lat':'39.74595', 'Long': '-75.54659'},
{'state':'Flordia', 'Lat':'25.77427', 'Long': '-80.19366'},
{'state':'Georgia', 'Lat':'33.749', 'Long': '-84.38798'},
{'state':'Hawaii', 'Lat':'21.39734', 'Long': '-157.97516'},
{'state':'Idaho', 'Lat':'46.41655', 'Long': '-117.01766'},
{'state':'Illinois', 'Lat':'39.80172', 'Long': '-89.64371'},
{'state':'Indiana', 'Lat':'39.76838', 'Long': '-86.15804'},
{'state':'Iowa', 'Lat':'41.60054', 'Long': '-93.60911'},
{'state':'Kansas', 'Lat':'38.97167', 'Long': '-95.23525'},
{'state':'Kentucky', 'Lat':'38.25424', 'Long': '-85.75941'},
{'state':'Louisiana', 'Lat':'30.22409', 'Long': '-92.01984'},
{'state':'Maine', 'Lat':'43.65737', 'Long': '-70.2589'},
{'state':'Maryland', 'Lat':'39.29038', 'Long': '-76.61219'},
{'state':'Massachusetts', 'Lat':'42.35843', 'Long': '-71.05977'},
{'state':'Michigan', 'Lat':'42.33143', 'Long': '-83.04575'},
{'state':'Minnesota', 'Lat':'44.97997', 'Long': '-93.26384'},
{'state':'Mississippi', 'Lat':'32.29876', 'Long': '-90.18481'},
{'state':'Missouri', 'Lat':'37.21533 ', 'Long': '-93.29824'},
{'state':'Montana', 'Lat':'45.78329', 'Long': '-108.50069'},
{'state':'Nebraska','Lat':'41.25626', 'Long': '-95.94043'},
{'state':'Nevada', 'Lat':'36.17497', 'Long': '-115.13722'},
{'state':'New Hampshire', 'Lat':'43.20814', 'Long': '-71.53757'},
{'state':'New Jersey', 'Lat':'40.21705', 'Long': '-74.74294'},
{'state':'New Mexico', 'Lat':'35.08449', 'Long': '-106.65114'},
{'state':'New York', 'Lat':'42.65258', 'Long': '-73.75623'},
{'state':'North Carolina', 'Lat':'35.22709', 'Long': '-80.84313'},
{'state':'North Dakota', 'Lat':'46.80833', 'Long': '-100.78374'},
{'state':'Ohio', 'Lat':'39.96118', 'Long': '-82.99879'},
{'state':'Oklahoma', 'Lat':'35.46756', 'Long': '-97.51643'},
{'state':'Oregon', 'Lat':'44.9429', 'Long': '-123.0351'},
{'state':'Pennsylvania', 'Lat':'42.12922', 'Long': '-80.08506'},
{'state':'Rhode Island', 'Lat':'41.82399', 'Long': '-71.41283'},
{'state':'South Carolina', 'Lat':'34.00071', 'Long': '-81.03481'},
{'state':'South Dakota', 'Lat':'44.36832', 'Long': '-100.35097'},
{'state':'Tennessee', 'Lat':'36.16589', 'Long': '-86.78444'},
{'state':'Texas', 'Lat':'30.26715', 'Long': '-97.74306'},
{'state':'Utah', 'Lat':'40.52189', 'Long': '-111.9391'},
{'state':'Vermont', 'Lat':'44.47588', 'Long': '-73.21207'},
{'state':'Virginia', 'Lat':'37.55376', 'Long': '-77.46026'},
{'state':'Washington', 'Lat':'47.30732 ', 'Long': '-122.22845'},
{'state':'West Virginia', 'Lat':'38.41925', 'Long': '-82.44515'},
{'state':'Wisconsin', 'Lat':'43.07305', 'Long': '-89.40123'},
{'state':'Wyoming', 'Lat':'41.13998', 'Long': '-104.82025'},
{'state':'Puerto Rico', 'Lat':'18.220833', 'Long': '-66.590149'},
{'state':'District of Colombia', 'Lat':'38.89511', 'Long': '-77.03637'},
{'state':'Federal', 'Lat':'37.09024', 'Long': '-95.712891'}]
z=pd.DataFrame.from_dict(states)
z.head() 

Unnamed: 0,state,Lat,Long
0,Alabama,33.52066,-86.80249
1,Alaska,61.21806,-149.90028
2,Arizona,33.44838,-112.07404
3,Arkansas,34.969704,-92.373123
4,California,32.71571,-117.16472


In [21]:
minimum_wage_df['Latitude']=z['Lat']
minimum_wage_df['Longitude']=z['Long']
minimum_wage_df.to_excel("../Data/MinimumWageStates.xlsx")

In [12]:
# minimum_wage_df['Long']=0
# for i in range(len(minimum_wage_df)):
#     if minimum_wage_df['States'][i]=='Alaska':
#         minimum_wage_df['Long']==1
# minimum_wage_df.head()

In [13]:
# # a=minimum_wage_df
# for i in range(len(minimum_wage_df)):
#     for lat in z:
#         if minimum_wage_df['States'][i]==z['state']:
#             minimum_wage_df['Lat'][i]=2

# minimum_wage_df

In [14]:
minimum_wage_df

Unnamed: 0,States,Minimum_Wage,Latitude,Longitude
0,Alabama,7.25,33.52066,-86.80249
1,Alaska,10.34,61.21806,-149.90028
2,Arizona,12.15,33.44838,-112.07404
3,Arkansas,11.0,34.969704,-92.373123
4,California,13.0,32.71571,-117.16472
5,Colorado,12.32,39.73915,-104.9847
6,Connecticut,12.0,41.17923,-73.18945
7,Delaware,9.25,39.74595,-75.54659
8,Florida,8.65,25.77427,-80.19366
9,Georgia,7.25,33.749,-84.38798


# OECD Lat and Long

In [15]:
countries=[{'County':'Australia', 'Lat':'-25.274398',  'Long':'133.775136'},
{'County':'Belgium',   'Lat':'50.503887',   'Long':'4.469936'},
{'County':'Canada',    'Lat':'56.130366',   'Long':'-106.346771'},
{'County':'Chile',     'Lat':'-35.675147',  'Long':'-71.542969'},
{'County':'Colombia',  'Lat':'4.570868 ',   'Long':'-74.297333'},
{'County':'Czech Republic', 'Lat':'49.817492', 'Long':'15.472962'},
{'County':'Estonia', 'Lat':'58.595272', 'Long':'25.013607'},
{'County':'France', 'Lat':'46.227638', 'Long':'2.213749'},
{'County':'Germany', 'Lat':'51.165691', 'Long':'10.451526'},
{'County':'Greece', 'Lat':'39.074208', 'Long':'21.824312'},
{'County':'Hungary', 'Lat':'47.162494', 'Long':'19.503304'},
{'County':'Ireland', 'Lat':'53.41291', 'Long':'-8.24389'},
{'County':'Israel', 'Lat':'31.046051', 'Long':'34.851612'},
{'County':'Japan', 'Lat':'36.204824', 'Long':'138.252924'},
{'County':'Korea', 'Lat':'35.907757', 'Long':'127.766922'},
{'County':'Latvia', 'Lat':'56.879635', 'Long':'24.603189'},
{'County':'Lithuania', 'Lat':'55.169438', 'Long':'23.881275'},
{'County':'Luxembourg', 'Lat':'49.815273', 'Long':'6.129583'},
{'County':'Mexico', 'Lat':'23.634501', 'Long':'-102.552784'},
{'County':'Netherlands', 'Lat':'52.132633', 'Long':'5.291266'},
{'County':'New Zealand', 'Lat':'-40.900557', 'Long':'174.885971'},
{'County':'Poland', 'Lat':'51.919438', 'Long':'19.145136'},
{'County':'Portugal', 'Lat':'39.399872', 'Long':'-8.224454'},
{'County':'Slovakia', 'Lat':'48.669026', 'Long':'19.699024'},
{'County':'Slovenia', 'Lat':'46.151241', 'Long':'14.995463'},
{'County':'Spain', 'Lat':'40.463667', 'Long':'-3.74922'},
{'County':'Turkey', 'Lat':'38.963745', 'Long':'35.243322'},
{'County':'United Kingdom', 'Lat':'55.378051', 'Long':'-3.435973'},
{'County':'United States', 'Lat':'37.09024', 'Long':'-95.712891'},
{'County':'Costa Rica', 'Lat':'9.748917', 'Long':'-83.753428'},
{'County':'Brazil', 'Lat':'-14.235004', 'Long':'-51.92528'},
{'County':'Russian Federation', 'Lat':'61.52401', 'Long':'105.318756'}]
y=pd.DataFrame.from_dict(countries)
y

Unnamed: 0,County,Lat,Long
0,Australia,-25.274398,133.775136
1,Belgium,50.503887,4.469936
2,Canada,56.130366,-106.346771
3,Chile,-35.675147,-71.542969
4,Colombia,4.570868,-74.297333
5,Czech Republic,49.817492,15.472962
6,Estonia,58.595272,25.013607
7,France,46.227638,2.213749
8,Germany,51.165691,10.451526
9,Greece,39.074208,21.824312


# G 20

In [16]:
g_twenty=[
    {'County': 'Argentina', 'Lat':'-38.416097', 'Long':'-63.616672'},
     
    {'County':'Australia', 'Lat':'-25.274398',  'Long':'133.775136'},
    {'County':'Brazil', 'Lat':'-14.235004', 'Long':'-51.92528'}, 
    {'County':'Canada',    'Lat':'56.130366',   'Long':'-106.346771'},
     
    {'County': 'China', 'Lat':'35.86166', 'Long':'104.195397'},
     
    {'County':'France', 'Lat':'46.227638', 'Long':'2.213749'},
    {'County':'Germany', 'Lat':'51.165691', 'Long':'10.451526'},
     
    {'County': 'India', 'Lat':'20.593684', 'Long':'78.96288'},
    {'County': 'Indonesia', 'Lat':'-0.789275', 'Long':'113.921327'},
    {'County': 'Italy', 'Lat':'41.87194', 'Long':'12.56738'},
     
    {'County':'Japan', 'Lat':'36.204824', 'Long':'138.252924'},
    {'County':'Korea', 'Lat':'35.907757', 'Long':'127.766922'},
    {'County':'Mexico', 'Lat':'23.634501', 'Long':'-102.552784'},
    {'County':'Russian Federation', 'Lat':'61.52401', 'Long':'105.318756'}, 
     
    {'County': 'Saudi Arabia', 'Lat':'23.885942', 'Long':'45.079162'},
    {'County': 'South Africa', 'Lat':'-30.559482', 'Long':'22.937506'},
     
    {'County':'Turkey', 'Lat':'38.963745', 'Long':'35.243322'}, 
    {'County':'United Kingdom', 'Lat':'55.378051', 'Long':'-3.435973'},
    {'County':'United States', 'Lat':'37.09024', 'Long':'-95.712891'}]
x=pd.DataFrame.from_dict(g_twenty)
x

Unnamed: 0,County,Lat,Long
0,Argentina,-38.416097,-63.616672
1,Australia,-25.274398,133.775136
2,Brazil,-14.235004,-51.92528
3,Canada,56.130366,-106.346771
4,China,35.86166,104.195397
5,France,46.227638,2.213749
6,Germany,51.165691,10.451526
7,India,20.593684,78.96288
8,Indonesia,-0.789275,113.921327
9,Italy,41.87194,12.56738


In [17]:
minimum_wage_df.head()

Unnamed: 0,States,Minimum_Wage,Latitude,Longitude
0,Alabama,7.25,33.52066,-86.80249
1,Alaska,10.34,61.21806,-149.90028
2,Arizona,12.15,33.44838,-112.07404
3,Arkansas,11.0,34.969704,-92.373123
4,California,13.0,32.71571,-117.16472


In [18]:
# split records index columns values table
result = minimum_wage_df.to_json(orient="records")
parsed = json.loads(result)
#serialize the json file
json_object=json.dumps(parsed, indent=4) 

with open("states.json", "w") as outfile: 
    outfile.write(json_object) 

# Convert into GeoJSON

In [19]:
with open('state_geoJson.json', 'r') as myfile:
    data=myfile.read()

FileNotFoundError: [Errno 2] No such file or directory: 'state_geoJson.json'

In [None]:
df = pd.read_json(data)

In [None]:
df

In [None]:
x=pd.DataFrame(df['features'])
x