In [None]:
import os
import warnings
import pandas as pd
import numpy as np
import datetime
from pandas.errors import SettingWithCopyWarning
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import Lasso, LassoCV, ElasticNet, LinearRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score

warnings.simplefilter(action='ignore', category=SettingWithCopyWarning)

In [None]:
#data cleaning
save_path = 'data'

timeframe_dict = {}

for file in os.listdir(save_path):
    if file.endswith('-qubits.csv'):
        qubit_data = pd.read_csv(os.path.join(save_path, file))
        timeframe = file.split('-q')[0]
        if timeframe in timeframe_dict:
            timeframe_dict[timeframe]['qubit_data'] = qubit_data
        else:
            timeframe_dict[timeframe] = {'qubit_data': qubit_data}
    elif file.endswith('-gates.csv'):
        gate_data = pd.read_csv(os.path.join(save_path, file))
        timeframe = file.split('-g')[0]
        if timeframe in timeframe_dict:
            timeframe_dict[timeframe]['gate_data'] = gate_data
        else:
            timeframe_dict[timeframe] = {'gate_data': gate_data}
    elif file.endswith('general.csv'):
        general_data = pd.read_csv(os.path.join(save_path, file))
        timeframe = file.split('-g')[0]
        if timeframe in timeframe_dict:
            timeframe_dict[timeframe]['general_data'] = general_data
        else:
            timeframe_dict[timeframe] = {'general_data': general_data}

feature_names = None
flattened_input_data = []
flattened_output_data = []
num_of_days = 315
for i in range(num_of_days):
    date = datetime.datetime(2023, 1, 1) + datetime.timedelta(i)
    dateString = date.strftime('%Y-%m-%d')

    target_gate = 'cx0_1'
    assoc_gate = 'cx1_0'
    qubit_allowed_values = [0,1]
    #gate_disallowed_values = ['cx1_0', 'cx0_1']
    timeframe_data = timeframe_dict[dateString]
    qubit_data_for_timeframe = timeframe_data['qubit_data']
    gate_data_for_timeframe = timeframe_data['gate_data']
    general_data_for_timeframe = timeframe_data['general_data']

    qubit_data_for_timeframe = qubit_data_for_timeframe[qubit_data_for_timeframe['Qubit'].isin(qubit_allowed_values)]
    gate_data_for_timeframe = gate_data_for_timeframe[gate_data_for_timeframe['Qubit'].isin(qubit_allowed_values)]
    tmp_df = gate_data_for_timeframe[(gate_data_for_timeframe['Parameter'] == 'gate_error') &
                                     ((gate_data_for_timeframe['Gate Name'] == target_gate) | (gate_data_for_timeframe['Gate Name'] == assoc_gate))].copy()
    # tmp_df = gate_data_for_timeframe[(gate_data_for_timeframe['Parameter'] == 'gate_error') & (gate_data_for_timeframe['Gate Name'] == target_gate)].copy()
    flattened_output_data.append(tmp_df.iloc[0]['Value'])
    
    #Removes the predicted variable from the input data
    gate_data_for_timeframe = gate_data_for_timeframe[~((gate_data_for_timeframe['Parameter'] == 'gate_error')
                                                        & ((gate_data_for_timeframe['Gate Name'] == target_gate) | (gate_data_for_timeframe['Gate Name'] == assoc_gate)))]

    qubit_data_for_timeframe['Name'] = qubit_data_for_timeframe['Name'] + '_' + qubit_data_for_timeframe['Qubit'].astype(str)
    gate_data_for_timeframe['Name'] = gate_data_for_timeframe['Gate Name'] + '_' + gate_data_for_timeframe['Parameter'] + '_value'

    qubit_drop = ['Qubit', 'Unit']
    qubit_data_for_timeframe = qubit_data_for_timeframe.drop(columns=qubit_drop)
    gate_drop = ['Qubit', 'Gate', 'Parameter', 'Unit', 'Gate Name']
    gate_data_for_timeframe = gate_data_for_timeframe.drop(columns=gate_drop)
    gate_title = ['Name', 'Value']
    gate_data_for_timeframe = gate_data_for_timeframe.reindex(columns=gate_title)

    # full_data = pd.DataFrame()
    full_data = pd.concat([qubit_data_for_timeframe, gate_data_for_timeframe], ignore_index=False, sort=False)
    full_data = full_data.pivot_table(values='Value', columns=['Name'], aggfunc='first')
    if i == 0:
        feature_names = list(full_data.keys())
        print(feature_names)
    flattened_input_data.append(full_data.values.flatten())

print('done')


In [None]:
# set up regression model
tscv = TimeSeriesSplit(n_splits=5)
scaler = StandardScaler()

X = np.array(flattened_input_data)
y = np.array(flattened_output_data)

# X = scaler.fit_transform(X)
# poly = PolynomialFeatures(degree=2)
# X = poly.fit_transform(X)

MIN_ERR = 0.0001
MAX_ERR = 0.1

# standardize data
y_inds = [i for i in range(len(y)) if (y[i] <= MAX_ERR) and (y[i] >= MIN_ERR)]

X = X[y_inds]
y = y[y_inds]

print("Original features %s" % str(list(enumerate(feature_names))))
print()

#print([X[i,4] for i in range(X.shape[0])])
#print()

#bad_aspects = frozenset(['anharmonicity_0', 'anharmonicity_1', 'cx1_0_gate_length_value', 'cx0_1_gate_length_value'])
print(X.shape)
X -= np.mean(X, axis=0)
Xstd = np.std(X, axis = 0)
print("Standard Deviations %s" % str(list(zip(feature_names, Xstd))))
print()
x_inds_bad = [i for i in range(len(feature_names)) if Xstd[i] <= 0.00000001]
print("Bad names %s" % str([feature_names[i] for i in x_inds_bad]))
feature_names = [feature_names[i] for i in range(len(feature_names)) if i not in x_inds_bad]
X = np.delete(X, x_inds_bad, axis=1)
X /= np.std(X, axis=0)
print(X.shape)

print("New features: %s" % str(list(zip(feature_names, X[0]))))
print()

y -= np.mean(y)
y /= np.std(y)

y_mean = np.mean(y)
baseline_mse = mean_squared_error(np.ones(len(y), dtype=np.double)*y_mean, y)
print("Average MSE of average %f:" % baseline_mse)

if 'cx0_1_gate_error_value' in feature_names:
    triv_ind = feature_names.index('cx0_1_gate_error_value')
    triv_mse = mean_squared_error(X[:,triv_ind], y)
    print("Trivial prediction MSE %f" % triv_mse)



In [None]:
# begin training model
results = []
coeff = []
alphas = []
l1_ratio = 0.5
for i in range(1, 400, 10):
    # alphas.append(i/100)
    # for Lasso models
    # model = Lasso(alpha=(i/100))

    # for ElasticNet models
    alpha = i / 250
    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

    mse_scores = []
    r2_scores = []

    for train_index, test_index in tscv.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        X_train_scaled = X_train#scaler.fit_transform(X_train)
        X_test_scaled = X_test#scaler.fit_transform(X_test)

        model.fit(X_train_scaled, y_train)

        y_pred = model.predict(X_test_scaled)
        
        #print(y_pred)

        mse = mean_squared_error(y_test, y_pred)
        #print("Predictions: %s" % str(y_pred))
        r2 = r2_score(y_test, y_pred)

        mse_scores.append(mse)
        r2_scores.append(r2)

    avg_mse = np.mean(mse_scores)
    avg_r2 = np.mean(r2_scores)
    
    print(str({'Alpha': alpha, 'Avg_MSE': avg_mse, 'Avg_R2': avg_r2}))
    results.append({'Alpha': alpha, 'Avg_MSE': avg_mse, 'Avg_R2': avg_r2})
    print(model.coef_)
    coeff.append(model.coef_)
    print("Non-zero coeffs: %s" % (str([feature_names[i] for i in range(len(feature_names)) if (model.coef_[i] != 0)]),))
    print()
    
results_df = pd.DataFrame(results)
results_coeff = pd.DataFrame(coeff)

results_df.to_csv('poly_elasticnet_results.csv', index=True)
results_coeff.to_csv('coefficients.csv', index=True)

print(f'done')