[Reference1](https://medium.com/@megandibble/web-scraping-a-javascript-heavy-website-in-python-and-using-pandas-for-analysis-7efb22315858) <br>
[Reference2](https://blog.hartleybrody.com/web-scraping-cheat-sheet/#useful-libraries)

In [9]:
import pandas as pd
import requests 
import numpy as np
from bs4 import BeautifulSoup

In [10]:
current = pd.read_csv('https://raw.githubusercontent.com/mdibble2/Projects/master/data/suppliers_v2.csv')

In [11]:
current.head()

Unnamed: 0,supplier
0,Ivystone
1,Meissenburg Designs
2,Snark City
3,Chronicle Books
4,Workman Publishing


In [12]:
bnum = 1
floornum = 20
url = 'https://www.americasmart.com/browse/#/exhibitor?market=23&building=' + str(bnum) + '&floor=' + str(floornum) 
response = requests.get(url)
print(url)

if response.ok == True:
    data = response.text
    #print (data)
    soup = BeautifulSoup(data, 'lxml')
    #print (soup.prettify())
    booths = soup.select('body')
    #print (booths)
    text = soup.find_all(text=True)
    #print(text)

https://www.americasmart.com/browse/#/exhibitor?market=23&building=1&floor=20


In [13]:
url = 'https://wem.americasmart.com/api/v1.2/Search/LinesAndPhotosByMarket?status=ACTIVE_AND_UPCOMING&marketID=23'
r = requests.get(url)
info = r.json()
print(info[1])

{'showroomName': 'Abbey & CA Gift', 'booths': [{'floorNum': 13, 'isPerm': True, 'building': 2.0, 'lateOptIn': None, 'meridianUID': '1335A', 'showMarketLateOptInDates': [{'showLateOptInDatesID': 14, 'showLateOptInDate': '2020-01-16T05:00:00Z'}], 'title': '1335A', 'boothID': 249415}], 'logo': '//wem.americasmart.com/convdata/amc/images/ExhibitorLogos/44FEE1FF-5056-86CF-980D4A62EACE696B.jpg', 'productLines': [{'exhibLineID': 294565, 'description': 'Cathedral Art Metal Co.'}, {'exhibLineID': 300361, 'description': 'Amazing Woman Collection'}, {'exhibLineID': 300362, 'description': 'Say it with Sass'}, {'exhibLineID': 300363, 'description': 'Advent Collection'}, {'exhibLineID': 300364, 'description': 'Grace Outpoured Coaster Mugs'}, {'exhibLineID': 300369, 'description': 'Professions'}, {'exhibLineID': 300373, 'description': 'Car Charms & Visor Clips'}, {'exhibLineID': 310947, 'description': 'Abbey & CA Gift'}], 'exhibitorID': 2189}


In [14]:
#narrowing down all suppliers in one show room
for j in range(0,len(info[1]['productLines'])):
    print(info[1]['productLines'][j]['description'])

#played around with indexing to find the data I needed
info[1]['booths'][0]['title']

Cathedral Art Metal Co.
Amazing Woman Collection
Say it with Sass
Advent Collection
Grace Outpoured Coaster Mugs
Professions
Car Charms & Visor Clips
Abbey & CA Gift


'1335A'

In [15]:
allbooths = []

for i in range(0, len(info)):
    loc = info[i] #select the item in the list provided by the AJAX query
    for j in range(0,len(info[i]['productLines'])):  #some booths have multiple lines, which is why we need 2 for loops
        
        booth = loc['showroomName'] 
        boothid = loc['booths'][0]['title']
        line = info[i]['productLines'][j]['description']
        bldg = int(loc['booths'][0]['building']) 
        floor = loc['booths'][0]['floorNum']

        sublist1 = [line,bldg,floor,booth,boothid] #want there to be a separate record if this line is in a booth with another title
        sublist2 = [booth,bldg,floor,booth,boothid] #also want to capture lines that have their own booth
        allbooths.append(sublist1)
        allbooths.append(sublist2)

df = pd.DataFrame(allbooths,columns=['supplier','building','floor','booth','id'])

In [17]:
df.head()

Unnamed: 0,supplier,building,floor,booth,id
0,Moby Dick Specialties,2,10,Moby Dick Specialties,1029
1,Moby Dick Specialties,2,10,Moby Dick Specialties,1029
2,Cathedral Art Metal Co.,2,13,Abbey & CA Gift,1335A
3,Abbey & CA Gift,2,13,Abbey & CA Gift,1335A
4,Amazing Woman Collection,2,13,Abbey & CA Gift,1335A


In [20]:
df2 = df.drop_duplicates()

In [23]:
df2[df2['supplier']=="Ivystone"]

Unnamed: 0,supplier,building,floor,booth,id
6714,Ivystone,2,11,Ivystone,1101


In [22]:
merge = current.merge(df2)
merge.head()

Unnamed: 0,supplier,building,floor,booth,id
0,Ivystone,2,11,Ivystone,1101
1,Meissenburg Designs,2,7,Meissenburg Designs,787B
2,Chronicle Books,2,16,"Simblist Group, The",1621
3,Workman Publishing,2,17,Anne McGilvray & Company,1718
4,Mary Square,2,18,OneCoast,1800


In [24]:
s1 = merge.supplier
s2 = current.supplier
s3 = s1.append(s2)
print(s3.drop_duplicates())

0                     Ivystone
1          Meissenburg Designs
2              Chronicle Books
3           Workman Publishing
4                  Mary Square
5                Ellembee Home
6             Walton Wood Farm
7                Sapling Press
8                  Knock Knock
9                    Capabunga
11             Barefoot Dreams
12              Creative Co-Op
13                 Adams & Co.
14                Spunky Fluff
15    Anne McGilvray & Company
16                Design Ideas
18          Eric & Christopher
19                Karma Living
20               Sweet Gumball
21                 Stash Style
22               Julio Designs
23                      Joules
26                  Chez Gagne
27        Pretty Alright Goods
28                Reeves & Co.
2                   Snark City
11                   Fish Kiss
22              Ella B Candles
24            Torched Products
25            Socksmith Design
26      Cedar Mountain Studios
28       Venture Imports, LLC 
29      

In [26]:
current2 = pd.read_csv('https://raw.githubusercontent.com/mdibble2/Projects/master/data/suppliers_v2.csv')
merge2 = current2.merge(df2)

In [27]:
s1 = merge2.supplier
s2 = current2.supplier
s3 = s1.append(s2)
print(s3.drop_duplicates())

0                     Ivystone
1          Meissenburg Designs
2              Chronicle Books
3           Workman Publishing
4                  Mary Square
5                Ellembee Home
6             Walton Wood Farm
7                Sapling Press
8                  Knock Knock
9                    Capabunga
11             Barefoot Dreams
12              Creative Co-Op
13                 Adams & Co.
14                Spunky Fluff
15    Anne McGilvray & Company
16                Design Ideas
18          Eric & Christopher
19                Karma Living
20               Sweet Gumball
21                 Stash Style
22               Julio Designs
23                      Joules
26                  Chez Gagne
27        Pretty Alright Goods
28                Reeves & Co.
2                   Snark City
11                   Fish Kiss
22              Ella B Candles
24            Torched Products
25            Socksmith Design
26      Cedar Mountain Studios
28       Venture Imports, LLC 
29      

In [28]:
merge2.sort_values(by=['building','floor'],inplace = True)
merge2

Unnamed: 0,supplier,building,floor,booth,id
10,Capabunga,1,5,J. Grob Associates,G5
18,Eric & Christopher,1,9,Eric & Christopher,C19
19,Karma Living,1,9,Karma Living,C15
21,Stash Style,1,10,Stash Style,E6
20,Sweet Gumball,1,11,Sweet Gumball,E14
13,Adams & Co.,2,6,Adams & Co.,637A
1,Meissenburg Designs,2,7,Meissenburg Designs,787B
14,Spunky Fluff,2,7,Prairie Dance / Spunky Fluff,796
17,Design Ideas,2,8,Design Ideas,812B
11,Barefoot Dreams,2,9,FIELDSTONE,912


In [30]:
merge2.set_index(np.arange(0,29),inplace=True)
# merge2.to_csv('Market_Plan.csv')