In [1]:
import numpy as np
import pandas as pd

In [2]:
#Number of sample points
n_points = 100000

In [3]:
#generating random samples from gaussian distribution with (upper and lower) limits
def gaussian_with_limits(mean, std, n_points = 1, upper_limit = np.inf, lower_limit = 0):
    samples = []
    i = 0
    while i < n_points:
        rand = np.random.normal(mean, std)
        if (rand <= upper_limit and rand >= lower_limit):
            samples.append(rand)
        i = len(samples)
    return np.array(samples)

In [4]:
gaussian_with_limits(mean = 0.1, std = 0.04, n_points = 10000, upper_limit = 0.2)

array([0.10804609, 0.08725118, 0.06647611, ..., 0.08498228, 0.11049056,
       0.06993753])

In [5]:
h2_input = gaussian_with_limits(mean = 0.4, std = 0.15, n_points = n_points, lower_limit = 0.2)
o2_input = gaussian_with_limits(mean = 0.2, std = 0.22, n_points = n_points, lower_limit = 0.1)
h2o_input = gaussian_with_limits(mean = 0.15, std = 0.3, n_points = n_points)
h_input = gaussian_with_limits(mean = 0.1, std = 0.04, n_points = n_points, upper_limit = 0.2)
o_input = gaussian_with_limits(mean = 0.1, std = 0.04, n_points = n_points, upper_limit = 0.2)
h2o2_input = np.random.uniform(low=0.999e-4, high=1e-4, size=n_points)
ho2_input = np.random.uniform(low=0, high=2e-4, size=n_points)
ho_input = gaussian_with_limits(mean = 0.05, std = 0.04, n_points = n_points, upper_limit = 0.1)
t_input = np.random.randint(low=1e3, high=3e3, size=n_points)
p_input = np.random.randint(low=1e5, high=1e6, size=n_points)

In [6]:
input_data = { 
    'H2' : h2_input,
    'H' : h_input,
    'O' : o_input, 
    'O2' :  o2_input,
    'OH' : ho_input,
    'H2O' : h2o_input,
    'HO2' : ho2_input,
    'H2O2' : h2o2_input,
    'N2' : 0,
    'Ar' : 0,
    'P' : p_input,
    'T' : t_input,
    }

In [7]:
input_df = pd.DataFrame(data = input_data)
input_df

Unnamed: 0,H2,H,O,O2,OH,H2O,HO2,H2O2,N2,Ar,P,T
0,0.450780,0.058025,0.065181,0.148448,0.082397,0.008326,0.000145,0.0001,0,0,680254,2881
1,0.337551,0.086099,0.103093,0.321428,0.063953,0.004531,0.000069,0.0001,0,0,446946,1358
2,0.476762,0.147672,0.133048,0.501302,0.064842,0.017471,0.000068,0.0001,0,0,328390,2639
3,0.299005,0.088580,0.092473,0.682031,0.030785,0.165652,0.000154,0.0001,0,0,684968,2530
4,0.215600,0.087416,0.086427,0.244331,0.045865,0.479691,0.000125,0.0001,0,0,372979,1284
...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.614920,0.131835,0.166904,0.390446,0.067438,0.083875,0.000018,0.0001,0,0,378441,2978
99996,0.574370,0.093154,0.055786,0.443915,0.019235,0.146240,0.000098,0.0001,0,0,174177,1157
99997,0.395206,0.095864,0.076456,0.342164,0.068619,0.264980,0.000149,0.0001,0,0,608045,1334
99998,0.417000,0.177243,0.125730,0.219069,0.028946,0.180870,0.000037,0.0001,0,0,490910,2271


In [8]:
input_df.max()

H2           1.102056
H            0.199974
O            0.199988
O2           1.226115
OH           0.099997
H2O          1.516865
HO2          0.000200
H2O2         0.000100
N2           0.000000
Ar           0.000000
P       999984.000000
T         2999.000000
dtype: float64

In [9]:
#function to sum concentrations
def sum_conc(row):
    total = np.sum(row[:8])
    return total

#function to scale the concentrations
def norm_conc(df):
    df_copy = df.copy()
    df_copy['sum_conc'] = df_copy.apply(sum_conc, axis = 1)
    for col in df_copy.columns[:8]:
        df_copy[col] /= df_copy['sum_conc']
    return df_copy.drop(columns = ['sum_conc'])

In [10]:
input_df_normed = norm_conc(input_df)

In [11]:
input_df_normed

Unnamed: 0,H2,H,O,O2,OH,H2O,HO2,H2O2,N2,Ar,P,T
0,0.554191,0.071336,0.080134,0.182502,0.101299,0.010236,0.000179,0.000123,0,0,680254,2881
1,0.368174,0.093910,0.112445,0.350588,0.069755,0.004942,0.000075,0.000109,0,0,446946,1358
2,0.355457,0.110099,0.099196,0.373753,0.048344,0.013026,0.000051,0.000074,0,0,328390,2639
3,0.220054,0.065191,0.068056,0.501943,0.022656,0.121913,0.000113,0.000074,0,0,684968,2530
4,0.185934,0.075387,0.074535,0.210711,0.039554,0.413686,0.000108,0.000086,0,0,372979,1284
...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.422470,0.090575,0.114668,0.268249,0.046332,0.057625,0.000013,0.000069,0,0,378441,2978
99996,0.430918,0.069888,0.041853,0.333045,0.014431,0.109716,0.000074,0.000075,0,0,174177,1157
99997,0.317807,0.077089,0.061483,0.275154,0.055181,0.213086,0.000120,0.000080,0,0,608045,1334
99998,0.362926,0.154259,0.109426,0.190662,0.025193,0.157416,0.000032,0.000087,0,0,490910,2271


In [13]:
input_df_normed.max()

H2           0.726751
H            0.271747
O            0.266459
O2           0.704446
OH           0.161795
H2O          0.684623
HO2          0.000391
H2O2         0.000209
N2           0.000000
Ar           0.000000
P       999984.000000
T         2999.000000
dtype: float64

In [14]:
input_df_normed[input_df_normed.H > 0.2]

Unnamed: 0,H2,H,O,O2,OH,H2O,HO2,H2O2,N2,Ar,P,T
96,0.377392,0.233236,0.071977,0.142347,0.083714,0.091121,0.000072,0.000140,0,0,323153,1372
218,0.321128,0.202730,0.085754,0.130688,0.011107,0.248353,0.000116,0.000125,0,0,163194,1826
764,0.396911,0.218837,0.096865,0.152456,0.019450,0.115182,0.000185,0.000114,0,0,529367,2575
1185,0.443800,0.212307,0.009466,0.149316,0.053103,0.131768,0.000113,0.000127,0,0,203660,2137
1314,0.442967,0.229989,0.093922,0.131079,0.003032,0.098889,0.000001,0.000121,0,0,528672,2798
...,...,...,...,...,...,...,...,...,...,...,...,...
99057,0.360333,0.232667,0.089807,0.135410,0.029052,0.152497,0.000112,0.000123,0,0,845953,2204
99787,0.283886,0.251414,0.064383,0.324112,0.070748,0.005284,0.000034,0.000139,0,0,357557,1517
99857,0.405998,0.216671,0.115802,0.195056,0.053802,0.012499,0.000044,0.000128,0,0,833790,2213
99864,0.281154,0.230990,0.189336,0.189593,0.029284,0.079439,0.000065,0.000139,0,0,858673,1617


In [15]:
input_df_normed[input_df_normed.O > 0.2]

Unnamed: 0,H2,H,O,O2,OH,H2O,HO2,H2O2,N2,Ar,P,T
237,0.366157,0.172853,0.213169,0.151005,0.044322,0.052276,0.000084,0.000135,0,0,282681,2183
555,0.330066,0.075993,0.200495,0.194177,0.103056,0.095935,0.000134,0.000145,0,0,401019,1904
761,0.349465,0.018336,0.201961,0.289495,0.064840,0.075566,0.000220,0.000118,0,0,418109,2783
1034,0.316555,0.002277,0.219050,0.166467,0.033666,0.261851,0.000006,0.000129,0,0,129049,2975
1504,0.456283,0.070849,0.203539,0.171352,0.018543,0.079264,0.000044,0.000126,0,0,573210,1952
...,...,...,...,...,...,...,...,...,...,...,...,...
97770,0.397416,0.168686,0.211003,0.161220,0.050846,0.010565,0.000128,0.000136,0,0,603208,2054
98139,0.281011,0.102109,0.219356,0.135031,0.049863,0.212502,0.000008,0.000119,0,0,703100,2170
99697,0.274644,0.018005,0.208601,0.444914,0.045151,0.008464,0.000113,0.000108,0,0,534628,2520
99843,0.497229,0.119730,0.214718,0.160924,0.005226,0.001860,0.000161,0.000151,0,0,538075,2032


In [16]:
input_df_normed[input_df_normed.OH > 0.1]

Unnamed: 0,H2,H,O,O2,OH,H2O,HO2,H2O2,N2,Ar,P,T
0,0.554191,0.071336,0.080134,0.182502,0.101299,0.010236,0.000179,0.000123,0,0,680254,2881
186,0.294068,0.189307,0.090562,0.224355,0.101455,0.100028,0.000106,0.000118,0,0,861583,2515
245,0.379020,0.161750,0.101513,0.229688,0.102714,0.024845,0.000302,0.000168,0,0,849634,2206
262,0.392603,0.104533,0.049007,0.307162,0.130309,0.016138,0.000110,0.000137,0,0,275072,1644
331,0.465702,0.062452,0.134625,0.120691,0.112695,0.103604,0.000117,0.000115,0,0,321606,2141
...,...,...,...,...,...,...,...,...,...,...,...,...
99695,0.285059,0.099857,0.140266,0.267728,0.105605,0.101228,0.000150,0.000107,0,0,824724,1036
99735,0.355095,0.040954,0.117448,0.342547,0.115491,0.028287,0.000062,0.000117,0,0,265486,2791
99804,0.480939,0.028446,0.183353,0.201438,0.101711,0.003811,0.000182,0.000119,0,0,532394,2719
99919,0.380288,0.180807,0.105748,0.157106,0.128331,0.047408,0.000171,0.000142,0,0,478977,2644


In [17]:
#exclude the samples points off limit for proportion of H, O and OH
input_df_normed.drop(input_df_normed[input_df_normed.OH > 0.1].index, inplace = True)
input_df_normed.drop(input_df_normed[input_df_normed.O > 0.2].index, inplace = True)
input_df_normed.drop(input_df_normed[input_df_normed.H > 0.2].index, inplace = True)

In [18]:
input_df_normed.max()

H2           0.726751
H            0.199951
O            0.199740
O2           0.704446
OH           0.099991
H2O          0.684623
HO2          0.000365
H2O2         0.000204
N2           0.000000
Ar           0.000000
P       999984.000000
T         2999.000000
dtype: float64

In [19]:
#size of data
len(input_df_normed)

98660

In [22]:
#affirm that concentration is normalized
assert input_df_normed.apply(sum_conc, axis = 1).all() == 1

In [23]:
input_df_normed.head()

Unnamed: 0,H2,H,O,O2,OH,H2O,HO2,H2O2,N2,Ar,P,T
1,0.368174,0.09391,0.112445,0.350588,0.069755,0.004942,7.5e-05,0.000109,0,0,446946,1358
2,0.355457,0.110099,0.099196,0.373753,0.048344,0.013026,5.1e-05,7.4e-05,0,0,328390,2639
3,0.220054,0.065191,0.068056,0.501943,0.022656,0.121913,0.000113,7.4e-05,0,0,684968,2530
4,0.185934,0.075387,0.074535,0.210711,0.039554,0.413686,0.000108,8.6e-05,0,0,372979,1284
5,0.512726,0.076426,0.114203,0.253493,0.033068,0.009927,6.3e-05,9.4e-05,0,0,895314,1946


In [24]:
#save to input.txt
np.savetxt("input_98660.txt", input_df_normed.to_numpy(), fmt = "%f")

In [25]:
!pwd

/Users/emmanuelakeweje/Documents/SkT/Research Contract
