## Creating functions from my eda to use within the final scripting

### packages

In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

### scraping function

In [8]:
def scraper(code: str) -> pd.DataFrame:

    try:
        # fetch HTML content
        code = code.lower()
        url = f"https://service.unece.org/trade/locode/{code}.htm"
        r = requests.get(url)
        r.raise_for_status()

        # Parse the HTML
        soup = BeautifulSoup(r.text, 'html.parser')

        # Find all tables
        tables = soup.find_all('table')

        # Extracting table data
        data = []
        for table in tables:
            for row in table.find_all('tr'):
                row_data = []
                for td in row.find_all('td'):
                    row_data.append(td.text.strip())
                if row_data:
                    data.append(row_data)

        # Create the Data Frame
        df = pd.DataFrame(data)
        print(df)

        return df
    except Exception as e:
        print(f"An error occured: {e}")
        return pd.DataFrame()


In [11]:
df = scraper("NO")

                                                     0               1   \
0                                                        United Nations   
1     Code for Trade and Transport Locations  (UN/LO...            None   
2                                          (NO)  NORWAY            None   
3                                                    Ch          LOCODE   
4                                                               NO  AAA   
...                                                 ...             ...   
1143                                                            NO  VOS   
1144                                                            NO  VYG   
1145                                                            NO  VRE   
1146                                                            NO  YTR   
1147                                                            NO  YTO   

                2                 3       4         5       6     7     8   \
0             None   

### Creating processing functions

These can be within a class as static methods then can be used within the main.py script

In [16]:
from LatLon23 import LatLon
import re
import pandas as pd

In [13]:
def format_table(df:pd.DataFrame) -> pd.DataFrame:
    
    df = df.drop(df.index[:3], inplace=False)
    df.columns= df.iloc[0]
    df = df.drop(3)
    df.reset_index(drop=True, inplace=True)
    
    return df

In [14]:
df = format_table(df)

In [15]:
df.head()

3,Ch,LOCODE,Name,NameWoDiacritics,SubDiv,Function,Status,Date,IATA,Coordinates,Remarks
0,,NO AAA,Å i Lofoten,A i Lofoten,18,1-------,AA,1407,,6753N 01259E,
1,,NO ABE,Abelnes,Abelnes,10,1-3-----,AA,1401,,5814N 00640E,
2,,NO ABV,Abelvær,Abelvar,17,1-------,AA,1407,,6444N 01111E,
3,,NO AAF,Åfjord,Afjord,16,1-------,AI,9704,,,
4,,NO AGD,Agdenes,Agdenes,16,1-------,AI,9704,,,
