<a href="https://colab.research.google.com/github/ChiefSimp/ME597_Airfoil_Performance_Prediction_Model/blob/main/Data_Consolidation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:


import pandas as pd

# Define the URL for the GitHub dataset
github_url = 'https://raw.githubusercontent.com/ChiefSimp/ME597_Airfoil_Performance_Prediction_Model/refs/heads/main/airfoil_dataset_all_airfoils.csv'

# Load the GitHub dataset into df_airfoil_geometry
df_airfoil_geometry = pd.read_csv(github_url)

# Remove specified columns from df_airfoil_geometry
columns_to_drop = ['cl_alpha_slope']
df_airfoil_geometry = df_airfoil_geometry.drop(columns=columns_to_drop, errors='ignore')

print("First 5 rows of 'df_airfoil_geometry' (from GitHub):")
print(df_airfoil_geometry.head())
print("\nShape of 'df_airfoil_geometry':", df_airfoil_geometry.shape)

First 5 rows of 'df_airfoil_geometry' (from GitHub):
  airfoil_id                polar_id         Re  max_thickness  \
0   2032c-il      xf-2032c-il-100000   100000.0         0.0796   
1   2032c-il   xf-2032c-il-100000-n5   100000.0         0.0796   
2   2032c-il     xf-2032c-il-1000000  1000000.0         0.0796   
3   2032c-il  xf-2032c-il-1000000-n5  1000000.0         0.0796   
4   2032c-il      xf-2032c-il-200000   200000.0         0.0796   

   x_at_max_thickness  max_camber  x_at_max_camber  max_cl  alpha_at_max_cl  
0                 0.2        18.0             18.0  1.5434             9.25  
1                 0.2        18.0             18.0  1.4859             8.25  
2                 0.2        18.0             18.0  1.6135            10.75  
3                 0.2        18.0             18.0  1.7197            11.75  
4                 0.2        18.0             18.0  1.5265             9.75  

Shape of 'df_airfoil_geometry': (15152, 9)


In [30]:
# Filter df_airfoil_geometry
rows_before_n5_filter_geometry = len(df_airfoil_geometry)
df_airfoil_geometry_filtered = df_airfoil_geometry[~df_airfoil_geometry['polar_id'].str.contains('n5', na=False)]
rows_after_n5_filter_geometry = len(df_airfoil_geometry_filtered)

print(f"Rows in df_airfoil_geometry before 'n5' filter: {rows_before_n5_filter_geometry}")
print(f"Rows in df_airfoil_geometry after 'n5' filter: {rows_after_n5_filter_geometry}")
print(f"Rows removed from df_airfoil_geometry due to 'n5' polar ID: {rows_before_n5_filter_geometry - rows_after_n5_filter_geometry}")

print("\nHead of df_airfoil_geometry_filtered:")
print(df_airfoil_geometry_filtered.head())

Rows in df_airfoil_geometry before 'n5' filter: 15152
Rows in df_airfoil_geometry after 'n5' filter: 7570
Rows removed from df_airfoil_geometry due to 'n5' polar ID: 7582

Head of df_airfoil_geometry_filtered:
  airfoil_id             polar_id         Re  max_thickness  \
0   2032c-il   xf-2032c-il-100000   100000.0         0.0796   
2   2032c-il  xf-2032c-il-1000000  1000000.0         0.0796   
4   2032c-il   xf-2032c-il-200000   200000.0         0.0796   
6   2032c-il    xf-2032c-il-50000    50000.0         0.0796   
8   2032c-il   xf-2032c-il-500000   500000.0         0.0796   

   x_at_max_thickness  max_camber  x_at_max_camber  max_cl  alpha_at_max_cl  
0                 0.2        18.0             18.0  1.5434             9.25  
2                 0.2        18.0             18.0  1.6135            10.75  
4                 0.2        18.0             18.0  1.5265             9.75  
6                 0.2        18.0             18.0  1.2002             6.00  
8                 0.2

In [31]:
# Calculate the frequency of the first two characters of the 'airfoil_id' column
airfoil_id_prefix_counts = df_airfoil_geometry_filtered['airfoil_id'].str[:2].value_counts()

print("Total occurrences for the first two characters of each Airfoil ID:")
print(airfoil_id_prefix_counts)

# Identify prefixes that appear less than 100 times
infrequent_airfoil_id_prefixes = airfoil_id_prefix_counts[airfoil_id_prefix_counts < 100].index.tolist()

# Get the number of rows before filtering
rows_before_filter = len(df_airfoil_geometry_filtered)

# Filter out rows where the first two characters of the airfoil ID are infrequent
df_airfoil_geometry_filtered_by_prefix = df_airfoil_geometry_filtered[~df_airfoil_geometry_filtered['airfoil_id'].str[:2].isin(infrequent_airfoil_id_prefixes)].copy()

# Calculate rows removed and remaining
rows_after_filter = len(df_airfoil_geometry_filtered_by_prefix)
rows_deleted = rows_before_filter - rows_after_filter

print(f"\nTotal number of rows in df_airfoil_geometry_filtered before filtering: {rows_before_filter}")
print(f"Total number of rows deleted: {rows_deleted}")
print(f"Total number of rows remaining: {rows_after_filter}")

print("\nFirst 5 rows of the filtered df_airfoil_geometry_filtered_by_prefix:")
print(df_airfoil_geometry_filtered_by_prefix.head())

Total occurrences for the first two characters of each Airfoil ID:
airfoil_id
go    1984
fx     570
na     489
mh     205
hq     200
      ... 
tr       5
ul       5
v2       5
wa       5
wh       5
Name: count, Length: 158, dtype: int64

Total number of rows in df_airfoil_geometry_filtered before filtering: 7570
Total number of rows deleted: 2602
Total number of rows remaining: 4968

First 5 rows of the filtered df_airfoil_geometry_filtered_by_prefix:
   airfoil_id            polar_id         Re  max_thickness  \
40    ag03-il   xf-ag03-il-100000   100000.0       0.046692   
42    ag03-il  xf-ag03-il-1000000  1000000.0       0.046692   
44    ag03-il   xf-ag03-il-200000   200000.0       0.046692   
46    ag03-il    xf-ag03-il-50000    50000.0       0.046692   
48    ag03-il   xf-ag03-il-500000   500000.0       0.046692   

    x_at_max_thickness  max_camber  x_at_max_camber  max_cl  alpha_at_max_cl  
40               0.065    0.051173            0.281  1.0880             9.25  
42    

In [32]:
# Calculate the frequency of each Re value in the dataset
re_counts = df_airfoil_geometry_filtered_by_prefix['Re'].value_counts()

print("Total occurrences for each Reynolds number:")
print(re_counts)

# Identify Re values that appear less than 10 times
infrequent_re_values = re_counts[re_counts < 10].index.tolist()

# Get the number of rows before filtering
rows_before_re_filter = len(df_airfoil_geometry_filtered_by_prefix)

# Filter out rows where the Re value is infrequent
df_airfoil_geometry_filtered_by_re = df_airfoil_geometry_filtered_by_prefix[~df_airfoil_geometry_filtered_by_prefix['Re'].isin(infrequent_re_values)].copy()

# Calculate rows removed and remaining
rows_after_re_filter = len(df_airfoil_geometry_filtered_by_re)
rows_deleted_re = rows_before_re_filter - rows_after_re_filter

print(f"\nTotal number of rows in df_airfoil_geometry_filtered_by_re before Re filtering: {rows_before_re_filter}")
print(f"Total number of rows deleted due to infrequent Re: {rows_deleted_re}")
print(f"Total number of rows remaining after Re filtering: {rows_after_re_filter}")

print("\nFirst 5 rows of the filtered df_airfoil_geometry_filtered_by_re:")
print(df_airfoil_geometry_filtered_by_re.head())

Total occurrences for each Reynolds number:
Re
100000.0     994
50000.0      994
1000000.0    993
200000.0     993
500000.0     993
231000.0       1
Name: count, dtype: int64

Total number of rows in df_airfoil_geometry_filtered_by_re before Re filtering: 4968
Total number of rows deleted due to infrequent Re: 1
Total number of rows remaining after Re filtering: 4967

First 5 rows of the filtered df_airfoil_geometry_filtered_by_re:
   airfoil_id            polar_id         Re  max_thickness  \
40    ag03-il   xf-ag03-il-100000   100000.0       0.046692   
42    ag03-il  xf-ag03-il-1000000  1000000.0       0.046692   
44    ag03-il   xf-ag03-il-200000   200000.0       0.046692   
46    ag03-il    xf-ag03-il-50000    50000.0       0.046692   
48    ag03-il   xf-ag03-il-500000   500000.0       0.046692   

    x_at_max_thickness  max_camber  x_at_max_camber  max_cl  alpha_at_max_cl  
40               0.065    0.051173            0.281  1.0880             9.25  
42               0.065    0

In [33]:
df_mean_thickness = df_airfoil_geometry_filtered_by_re.groupby('airfoil_id')['max_thickness'].mean().reset_index()

print("Head of df_mean_thickness:")
print(df_mean_thickness.head())

min_thickness = df_mean_thickness['max_thickness'].min()
max_thickness = df_mean_thickness['max_thickness'].max()

print(f"Minimum max_thickness: {min_thickness}")
print(f"Maximum max_thickness: {max_thickness}\n")

df_ratings = df_mean_thickness.copy()

df_ratings['sharpness_rating'] = 100 * (1 - (df_ratings['max_thickness'] - min_thickness) / (max_thickness - min_thickness))
df_ratings['radius_rating'] = 100 * ((df_ratings['max_thickness'] - min_thickness) / (max_thickness - min_thickness))

print("Head of df_ratings with sharpness and radius ratings:")
print(df_ratings.head())

df_airfoil_geometry_filtered = pd.merge(
    df_airfoil_geometry_filtered_by_re,
    df_ratings[['airfoil_id', 'sharpness_rating', 'radius_rating']],
    on='airfoil_id',
    how='left'
)

print("\nHead of the updated df_final_combined with sharpness and radius ratings:")
print(df_airfoil_geometry_filtered.head())

Head of df_mean_thickness:
  airfoil_id  max_thickness
0    ag03-il       0.046692
1    ag04-il       0.051408
2    ag08-il       0.051519
3    ag09-il       0.045880
4    ag10-il       0.045365
Minimum max_thickness: 0.0
Maximum max_thickness: 0.6638900000000001

Head of df_ratings with sharpness and radius ratings:
  airfoil_id  max_thickness  sharpness_rating  radius_rating
0    ag03-il       0.046692         92.966907       7.033093
1    ag04-il       0.051408         92.256549       7.743451
2    ag08-il       0.051519         92.239829       7.760171
3    ag09-il       0.045880         93.089217       6.910783
4    ag10-il       0.045365         93.166790       6.833210

Head of the updated df_final_combined with sharpness and radius ratings:
  airfoil_id            polar_id         Re  max_thickness  \
0    ag03-il   xf-ag03-il-100000   100000.0       0.046692   
1    ag03-il  xf-ag03-il-1000000  1000000.0       0.046692   
2    ag03-il   xf-ag03-il-200000   200000.0       0.046

In [34]:
output_filename = 'airfoil_geometry_and_angel_of_stall_data.csv'
df_airfoil_geometry_filtered.to_csv(output_filename, index=False)

print(f"Updated df_final_combined with sharpness and radius ratings successfully saved to: '{output_filename}'")

Updated df_final_combined with sharpness and radius ratings successfully saved to: 'airfoil_geometry_and_angel_of_stall_data.csv'
