<a href="https://colab.research.google.com/github/ChiefSimp/ME597_Airfoil_Performance_Prediction_Model/blob/main/TestDataGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd

df_airfoil = pd.read_csv('https://raw.githubusercontent.com/ChiefSimp/ME597_Airfoil_Performance_Prediction_Model/refs/heads/main/airfoil_geometry_and_angel_of_stall_data.csv')

print("First 5 rows of the DataFrame:")
print(df_airfoil.head())

print("\nDataFrame Info (columns and data types):")
df_airfoil.info()

print("\nDescriptive Statistics:")
print(df_airfoil.describe())

First 5 rows of the DataFrame:
  airfoil_id            polar_id         Re  max_thickness  \
0    ag03-il   xf-ag03-il-100000   100000.0       0.046692   
1    ag03-il  xf-ag03-il-1000000  1000000.0       0.046692   
2    ag03-il   xf-ag03-il-200000   200000.0       0.046692   
3    ag03-il    xf-ag03-il-50000    50000.0       0.046692   
4    ag03-il   xf-ag03-il-500000   500000.0       0.046692   

   x_at_max_thickness  max_camber  x_at_max_camber  max_cl  alpha_at_max_cl  \
0               0.065    0.051173            0.281  1.0880             9.25   
1               0.065    0.051173            0.281  1.3785            12.25   
2               0.065    0.051173            0.281  1.1490            10.25   
3               0.065    0.051173            0.281  1.0089             8.50   
4               0.065    0.051173            0.281  1.2897            11.25   

   sharpness_rating  radius_rating  
0         92.966907       7.033093  
1         92.966907       7.033093  
2         

In [9]:
excluded_cols = ['airfoil_id', 'polar_id']
numerical_cols = [col for col in df_airfoil.columns if col not in excluded_cols]

df_desc = df_airfoil[numerical_cols].describe()

column_ranges = {}
for col in numerical_cols:
    column_ranges[col] = {
        'min': df_desc.loc['min', col],
        'max': df_desc.loc['max', col]
    }

print("Identified numerical columns:")
print(numerical_cols)

print("\nDescriptive statistics for numerical columns:")
print(df_desc)

print("\nExtracted min/max ranges for data generation:")
print(column_ranges)

Identified numerical columns:
['Re', 'max_thickness', 'x_at_max_thickness', 'max_camber', 'x_at_max_camber', 'max_cl', 'alpha_at_max_cl', 'sharpness_rating', 'radius_rating']

Descriptive statistics for numerical columns:
                   Re  max_thickness  x_at_max_thickness   max_camber  \
count     4967.000000    4967.000000         4967.000000  4967.000000   
mean    369881.216026       0.089224            0.356622    18.888679   
std     351582.702477       0.064541            0.276069    19.786195   
min      50000.000000       0.000000            0.000000    -0.053920   
25%     100000.000000       0.040290            0.200000     0.094357   
50%     200000.000000       0.085600            0.300000    17.000000   
75%     500000.000000       0.129900            0.402000    26.000000   
max    1000000.000000       0.663890            1.000000   103.000000   

       x_at_max_camber       max_cl  alpha_at_max_cl  sharpness_rating  \
count      4967.000000  4967.000000      4967.

In [10]:
import numpy as np

synthetic_data = {}

for col in numerical_cols:
    min_val = column_ranges[col]['min']
    max_val = column_ranges[col]['max']
    synthetic_data[col] = np.random.uniform(min_val, max_val, 1000)

df_synthetic = pd.DataFrame(synthetic_data)

print("First 5 rows of the generated synthetic data:")
print(df_synthetic.head())

First 5 rows of the generated synthetic data:
              Re  max_thickness  x_at_max_thickness  max_camber  \
0  709604.281965       0.327471            0.806486   40.071442   
1  856670.450917       0.055152            0.093407   18.062218   
2  802302.602661       0.653692            0.977373   10.086732   
3  542305.157764       0.099877            0.832611   88.451426   
4  463894.814019       0.034556            0.994588    8.889236   

   x_at_max_camber    max_cl  alpha_at_max_cl  sharpness_rating  radius_rating  
0        49.649038  1.628696         0.919057         89.418935      25.160141  
1        92.657883  1.528306         2.123121         29.746269      28.922709  
2        61.553982  0.307761       -11.067029         98.273343      94.987475  
3        84.422798  2.063606         1.229994         54.054010      42.400440  
4        60.838530  1.083430         5.672213          1.723596      51.698868  


In [11]:
unique_re_values = df_airfoil['Re'].unique()

print("Unique 'Re' values:")
print(unique_re_values)

Unique 'Re' values:
[ 100000. 1000000.  200000.   50000.  500000.]


In [12]:
import numpy as np

synthetic_data = {}
num_rows = 1000

# Generate 'Re' values by sampling from unique_re_values
synthetic_data['Re'] = np.random.choice(unique_re_values, size=num_rows)

# Generate other numerical column values uniformly within their min/max ranges
numerical_cols_for_uniform = [col for col in numerical_cols if col != 'Re']

for col in numerical_cols_for_uniform:
    min_val = column_ranges[col]['min']
    max_val = column_ranges[col]['max']
    synthetic_data[col] = np.random.uniform(min_val, max_val, num_rows)

df_synthetic = pd.DataFrame(synthetic_data)

print("First 5 rows of the newly generated synthetic data:")
print(df_synthetic.head())

print("\nShape of the newly generated synthetic data:")
print(df_synthetic.shape)

First 5 rows of the newly generated synthetic data:
          Re  max_thickness  x_at_max_thickness  max_camber  x_at_max_camber  \
0    50000.0       0.448391            0.694201    8.585237        11.247675   
1    50000.0       0.071629            0.094638   32.788350        21.442142   
2  1000000.0       0.623295            0.681232   53.762353        87.227863   
3   200000.0       0.484137            0.056333   67.005704        93.729310   
4   100000.0       0.174687            0.626613   48.272158        60.650647   

     max_cl  alpha_at_max_cl  sharpness_rating  radius_rating  
0  2.175720        -5.267734         71.819494      56.365467  
1  1.633161         8.510800         24.396941      60.096061  
2  1.188347        -7.318239         87.169929      11.003976  
3  1.094498         9.628838         47.373481      29.584002  
4  1.576533         3.770721         48.362853      75.828313  

Shape of the newly generated synthetic data:
(1000, 9)


In [13]:
df_synthetic.to_csv('Testing data.csv', index=False)

print("'Testing data.csv' generated and saved successfully.")

'Testing data.csv' generated and saved successfully.
