# Import statements

In [5]:
import requests
import joblib

import pandas as pd
from tqdm import tqdm

from bs4 import BeautifulSoup
from urllib.error import HTTPError

# Get all the airfoil IDs from the database
1.  Use BeautifulSoup to extract the data from the website

In [3]:
URL = 'http://airfoiltools.com/search/airfoils'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

2. Parse the data and extract the airfoil internal IDs

In [4]:
airfoil_table = soup.find('table', class_='listtable')
airfoil_links = airfoil_table.find_all('a')

internal_airfoil_ids = []

for airfoil_tag in airfoil_links:
  link = airfoil_tag['href']
  airfoil_id = link[link.index('airfoil=')+8:]
  internal_airfoil_ids.append(airfoil_id)

print(f"Extracted {len(internal_airfoil_ids)} airfoil internal IDs from the database")
print(internal_airfoil_ids)

-il', 'be50-il', 'be50sm-il', 'boe103-il', 'boe106-il', 'bqm34-il', 'bw3-il', 'c141a-il', 'c141b-il', 'c141c-il', 'c141d-il', 'c141e-il', 'c141f-il', 'c5a-il', 'c5b-il', 'c5c-il', 'c5d-il', 'c5e-il', 'cap21c-il', 'cast102-il', 'ch10sm-il', 'chen-il', 'clarkk-il', 'clarkv-il', 'clarkw-il', 'clarkx-il', 'clarky-il', 'clarkyh-il', 'clarkys-il', 'clarkysm-il', 'clarkz-il', 'clarym15-il', 'clarym18-il', 'coanda1-il', 'coanda2-il', 'coanda3-il', 'cootie-il', 'cp-060-050-gn', 'cp-080-050-gn', 'cp-100-050-gn', 'cp-120-050-gn', 'cp-140-050-gn', 'cp-160-050-gn', 'cp-180-050-gn', 'cr001sm-il', 'cr1-il', 'curtisc72-il', 'dae11-il', 'dae21-il', 'dae31-il', 'dae51-il', 'davis-corrected-il', 'davis-il', 'davissm-il', 'daytonwright6-il', 'daytonwrightt1-il', 'dbln526-il', 'defcnd1-il', 'defcnd2-il', 'defcnd3-il', 'df101-il', 'df102-il', 'dfvlrr4-il', 'dga1138-il', 'dga1182-il', 'dh4009sm-il', 'doa5-il', 'dormoy-il', 'drgnfly-il', 'dsma523a-il', 'dsma523b-il', 'du06-w-200-dt', 'du84132v-il', 'du8608418

# Download all the Airfoil Selig files

In [16]:
prev_df = None
lengths = []

for airfoil_type in tqdm(internal_airfoil_ids):
  # This is the template URL used for the selig dat files, we just have to fill in the blanks
  url = f'http://airfoiltools.com/airfoil/seligdatfile?airfoil={airfoil_type}'
  
  try:
    # Try to read the selig file from the website
    df = pd.read_csv(url, skiprows=1, names=['X', 'Y'], sep=' ', skipinitialspace=True)

    # Add the airfoil type information
    df['Airfoil_Type'] = airfoil_type

    # Concatenate with all previous selig coordinates
    prev_df = pd.concat([df, prev_df])

    # Get the length so we can 
    lengths.append(len(df))
  except HTTPError as err:
    print(f"Could not open airfoil selig dat for url {url} because of an exception. {err}")

# Save our results!
prev_df.to_csv('all_airfoil_selig.csv', index=False, compression='gzip')
joblib.dump(lengths, 'all_airfoil_lengths.pkl')

# Get some length statistics
print()
print(f"Max Length = {max(lengths)}, Min Length = {min(lengths)}, Average Length = {sum(lengths)/len(lengths)}")

# Make sure we can read the file
print("=== Test read ===")
print(pd.read_csv('all_airfoil_selig.csv', compression='gzip').head())

100%|██████████| 1638/1638 [07:29<00:00,  3.64it/s]

Max Length = 260, Min Length = 24, Average Length = 64.43406593406593
=== Test read ===
         X        Y Airfoil_Type
0  1.00000  0.00000     ys930-il
1  0.99908  0.00015     ys930-il
2  0.99639  0.00070     ys930-il
3  0.99210  0.00177     ys930-il
4  0.98641  0.00335     ys930-il


# Download all the polar CSV files and put them into one pandas dataframe

In [None]:
# This will be the output dataframe with all the CSV files
# It takes around 90 minutes
prev_df = None

# Go through all airfoils in the database and a range of reynolds numbers and ncrits
for airfoil_type in internal_airfoil_ids:
  for reynolds_number in [50000, 100000, 200000, 500000, 1000000]:
    for ncrit in [5, 9]:
      # This is the template URL used for the polar files, we just have to fill in the blanks
      url = f'http://airfoiltools.com/polar/csv?polar=xf-{airfoil_type}-{reynolds_number}'

      # NOTE: Internally ncrit=9 is the default so use the default url without the ncrit={} part
      if ncrit != 9:
        url += '-n' + str(ncrit)

      # Try to read and process the polar file from the url
      try:
        print(f'Downloading and processing CSV for {airfoil_type} with reynolds {reynolds_number} and ncrit {ncrit}')

        # First we skip the useless rows and read the data using pandas
        df = pd.read_csv(url, skiprows=10)

        # Add the airfoil type, reynolds number, and ncrit as columns to the dataframe
        df['Airfoil_Type'] = airfoil_type
        df['Reynolds_Number'] = reynolds_number
        df['Ncrit'] = ncrit

        # Concatenate the new dataframe to the previous dataframes
        prev_df = pd.concat([df, prev_df])
      except HTTPError as err:
        print(f"Could not open airfoil polar csv for url {url} because of an exception. {err}")

prev_df.to_csv('all_airfoil_data.csv', index=False, compression='gzip')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Downloading and processing CSV for n24-il with reynolds 500000 and ncrit 9
Downloading and processing CSV for n24-il with reynolds 1000000 and ncrit 5
Downloading and processing CSV for n24-il with reynolds 1000000 and ncrit 9
Downloading and processing CSV for n2414-il with reynolds 50000 and ncrit 5
Downloading and processing CSV for n2414-il with reynolds 50000 and ncrit 9
Downloading and processing CSV for n2414-il with reynolds 100000 and ncrit 5
Downloading and processing CSV for n2414-il with reynolds 100000 and ncrit 9
Downloading and processing CSV for n2414-il with reynolds 200000 and ncrit 5
Downloading and processing CSV for n2414-il with reynolds 200000 and ncrit 9
Downloading and processing CSV for n2414-il with reynolds 500000 and ncrit 5
Downloading and processing CSV for n2414-il with reynolds 500000 and ncrit 9
Downloading and processing CSV for n2414-il with reynolds 1000000 and ncrit 5
Downloading and 

In [None]:
print(len(prev_df))

1692967
