## import reqiured libraries

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

## base url and headers 

In [None]:
base_url = 'https://www.acemicromatic.net/product_cat/milling/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
}

## create functions

### this function give a dictionary contains all the main milling machines and its link  

In [None]:
def products(url:str,headers:dict) -> dict:
    
    """
    to find products and its links
    
    arg
    -------------
    url : string of  url of a html page  
    headers : dictionary of header of web browsers
    
    return
    --------------
    
    return a dict of keys equal to product name and values is the link of prodcts 
    
    """
    
    base_page = requests.get(url=url,headers=headers)
    base_html = base_page.text
    
    soup = BeautifulSoup(base_html, 'html.parser')

    # find div element w.r.t class 
    products_wrapper = soup.find_all('div' ,class_='productcat-wrapper')
    

    products_div = products_wrapper[0].find_all('div',class_='button-wrapper')
    
    main_links = []

    for div in products_div:

        # find links
        link = div.find_all('a')
        href = link[0].get('href')
        main_links.append(href)
        
    main_products = [name.text.strip() for name in products_wrapper[0].find_all('h4')]
     
    products = {}
    
    for product,link in zip(main_products,main_links):
        products[product]=link
        
    return products


    

### this function give a dictionary contains all sub categories  the main milling machines and its link  for each main milling machines


In [None]:
def sub_categories(url,headers):
    
    product_page= requests.get(url,headers=headers)
    product_page_html = product_page.text
    soup = BeautifulSoup(product_page_html, 'html.parser')
    products_wrapper = soup.find_all('div' ,class_='section-block common-block wow fadeIn')
    products_div = products_wrapper[0].find_all('div',class_='button-wrapper')

    main_links = []

    for div in products_div:

        link = div.find_all('a')
        href = link[0].get('href')
        main_links.append(href)

    main_products = [name.text.strip() for name in products_wrapper[0].find_all('h4')]
    product= {}
    for categories,link in zip(main_products,main_links):
        product[categories]=link    
        
    return product

### this function specific for vertical maching center it has two level of sub categories. and give a dictionary contains all the sub category vertical milling machines and its link  


In [None]:
def vertical_machining_centers(url,headers):
    
    categories = products(url,headers)
    vertical = {}
    
    for category,ur in zip(categories.keys(),categories.values()):
    
        product = sub_categories(ur,headers)
        
        for key,value in zip(product.keys(),product.values()):
            
            vertical[category+'-'+key] = value
    
    return vertical
        
        
        
        

### this function is specific for 5 axes milling machine. return  x/y/z travel for 5 axes milling machine

In [None]:
def five_axes(url,headers):

    product_page = requests.get(url,headers=headers)
    product_html = product_page.text

    soup = BeautifulSoup(product_html, 'html.parser')

    machine = soup.find_all('tr' ,class_='hide_row hide_2')


    xyz = ''
    for ind,axes in enumerate(machine[:3]):

        if ind <2:
            xyz += machine[ind].find_all('td')[2].text.strip()+' / '
        else :
            xyz += machine[ind].find_all('td')[2].text.strip()
            
    return xyz

### this fuction return the x,y,z travels

In [None]:
def travel(url,header):
    
    if '5-axes' not in url: 
        
        if ('gemini-460-xl'  not in url) or ('gemini-460-xl'  not in url) :
            category_page= requests.get(url=url,headers=headers)
            category_page_html = category_page.text
            soup2 = BeautifulSoup(category_page_html, 'html.parser')

            tr= soup2.find_all('tr',class_ = 'hide_row hide_2')
            tds = tr[0].find_all('td')
            xyz = tds[2].text.strip()
            
        else:
            return 'x / y / z'

        return xyz
    else :
        
        return five_axes(url,headers)

## create dictionary 

In [None]:
final = {
    'param_1' : [],
    'param_2' : [],
    'model_name' : [],
    'x_travel' : [],
    'y_travel' : [],
    'z_travel' : []
}

## loop through all available products in milling 

In [None]:

base_products = products('https://www.acemicromatic.net/product_cat/milling/',headers)

for base_key,base_url in zip(base_products.keys(),base_products.values()):
    
    
    if ('vertical-machining-centers' not in base_url) and ('double-column' not in base_url) and ('5-axes-vmc' not in base_url):

        product = sub_categories(base_url,headers)
        
        print(product)
        
        for product_name,product_link in zip(product.keys(),product.values()):

            xyz = travel(product_link,headers)
            
            x,y,z = xyz.split(' / ',maxsplit=2)
            
            final['param_1'].append('milling')
            final['param_2'].append(base_key)
            final['model_name'].append(product_name)
            final['x_travel'].append(x)
            final['y_travel'].append(y)
            final['z_travel'].append(z)

    else:
        
        if ('double-column' in base_url) or ('5-axes-vmc' in base_url):
            
                product = sub_categories(base_url,headers)
        
                print(product)

                for product_name,product_link in zip(product.keys(),product.values()):

                    xyz = five_axes(product_link,headers)
                    
                    print(xyz)
                    
                    x,y,z = xyz.split(' / ',maxsplit=2)
                    final['param_1'].append('milling')
                    final['param_2'].append(base_key)
                    final['model_name'].append(product_name)
                    final['x_travel'].append(x)
                    final['y_travel'].append(y)
                    final['z_travel'].append(z)

            
        elif 'vertical-machining-centers' in base_url:

                product = vertical_machining_centers(base_url,headers)

                print(product)

                for product_name,product_link in zip(product.keys(),product.values()):

                    xyz = travel(product_link,headers)
                    print(xyz)

                    x,y,z = xyz.split(' / ',maxsplit=2)
                    final['param_1'].append('milling')
                    final['param_2'].append(base_key)
                    final['model_name'].append(product_name)
                    final['x_travel'].append(x)
                    final['y_travel'].append(y)
                    final['z_travel'].append(z)

        
        


## create DataFrame

In [None]:
df = pd.DataFrame(final)
df.head()

## save to  csv

In [None]:
df.to_csv('milling_machines.csv',index=False)