In [8]:
import numpy as np
import pandas as pd
from scipy.interpolate import UnivariateSpline

#### This notebook takes the 100 mu energy values from the averaged 10,000 samples file and expands it using a Univariate Spline (smoothing parameter = 0) to 1000 mu energy values.

In [9]:
# Load 10,000 averaged samples file and select just the 100 mu energy values.

mu_cn = pd.read_csv('mu_cn10000.csv')
mu100 = mu_cn.drop(['CN', 'Num Cu', 'Num Te'], axis=1)

In [11]:
# Use UnivariateSpline to expand the 100 to 1000 mu energy values - should increase the resolution of peaks for
# machine learning training.

xs = np.linspace(8970, 9050, 1000)
energies = np.linspace(8970, 9050, 100)

mu1000 = np.empty([10000,1000])
for i in range(mu100.shape[0]):
    s1 = UnivariateSpline(energies, mu100.loc[i], s=0)
    y1 = s1(xs) # generate a line with 1000 data
    mu1000[i] = y1
mu1000 = pd.DataFrame(mu1000)

In [17]:
# Select the coordination number and number of copper/tellurium associated with the 10,000 samples

cn_cu_te = mu_cn.loc[:, ['CN', 'Num Cu', 'Num Te']]

In [22]:
# Combine the CN/Cu/Te with the 1000 mu energy values for complete DataFrame.

mu_cn1000_energies = pd.concat([cn_cu_te, mu1000], axis=1)

In [24]:
# Saves DataFrame to a csv file in your current directory... only need to run once.

mu_cn1000_energies.to_csv('mu_cn1000_energies')