In [21]:
from typing import Iterable, Literal, Sequence, Tuple
import numpy as np
from qiskit import QuantumCircuit

# ------------------------------------------------------------
# Public entry point: build a circuit that (1) encodes features
# and (2) applies a configurable entangling block.
# ------------------------------------------------------------
def build_circuit(
    data: Iterable[float],
    *,
    # ---- Encoding controls ----
    normalize: bool = True,
    angle_scale: float = np.pi,                  # multiply angles by this (e.g., π)
    encoding_axes: Sequence[str] = ("rx", "ry"), # which rotations to use per feature
                                                 # e.g., ("ry",) or ("ry","rz") etc.
    # ---- Entangler controls ----
    entanglement: Literal["full", "ring", "linear"] = "full",
    gate:        Literal["cx", "cz"] = "cx",
    num_layers:  int = 2,
    alternate_directions: bool = True,
    add_barriers: bool = True,
    qubits_to_entangle = [(0,1)]
) -> QuantumCircuit:
    """
    Builds a feature-generating circuit for your shadows pipeline:
      data -> [angle encoding] -> [entangling layers]

    Parameters
    ----------
    data : Iterable[float]
        Your feature vector (one qubit per feature).
    normalize : bool
        Whether to apply tanh normalization (maps to [-1,1]) before angle scaling.
        Recommended for robustness.
    angle_scale : float
        Scalar to multiply features when used as rotation angles (default π).
    encoding_axes : Sequence[str]
        Rotations to apply per feature, in order. Options: "rx", "ry", "rz".
        Example: ("ry",) or ("ry","rz") or ("rx","ry","rz").
    entanglement : {"full","ring","linear"}
        Connectivity of the entangling block.
    gate : {"cx","cz"}
        Two-qubit gate family. Use "cz" if your backend favors symmetric CZ.
    num_layers : int
        Number of repeated entangling layers.
    alternate_directions : bool
        If using CX, flip control/target each layer to reduce directional bias.
    add_barriers : bool
        Add visual/compile barriers between layers (useful during debugging).

    Returns
    -------
    QuantumCircuit
        Circuit with encoding + entanglement applied.
    """
    data = np.asarray(list(data), dtype=float)
    n = int(data.size)
    assert n >= 2, "Need at least 2 qubits (features) to add entanglement."

    # ---------- 1) Normalize & scale angles ----------
    # Good default for continuous geophysical features (robust to outliers).
    if normalize:
        data = np.tanh(data)               # maps to [-1,1]
    thetas = angle_scale * data            # rescale to angles

    # ---------- 2) Encoding ----------
    qc = QuantumCircuit(n, name="encode+entangle")
    _apply_angle_encoding(qc, thetas, encoding_axes)

    # ---------- 3) Entangling block ----------
    qc = customized_entanglement(qc, indices=qubits_to_entangle)
    return qc


# ------------------------------------------------------------
# HELPER: angle-encoding with flexible axes per feature
# ------------------------------------------------------------
def _apply_angle_encoding(
    qc: QuantumCircuit,
    thetas: np.ndarray,
    axes: Sequence[str],
) -> None:
    """
    Apply angle encoding per feature/qubit.

    Each feature angle θ_i is applied using the sequence of rotations
    specified in `axes` (e.g., ("ry","rz") applies RY(θ_i) then RZ(θ_i)).
    """
    valid = {"rx", "ry", "rz"}
    axes = tuple(ax.lower() for ax in axes)
    if not axes:
        raise ValueError("encoding_axes must contain at least one of {'rx','ry','rz'}.")
    if any(ax not in valid for ax in axes):
        raise ValueError(f"encoding_axes must be in {valid}, got {axes}.")

    for q, theta in enumerate(thetas):
        for ax in axes:
            if ax == "rx":
                qc.rx(theta, q)
            elif ax == "ry":
                qc.ry(theta, q)
            elif ax == "rz":
                qc.rz(theta, q)


# ------------------------------------------------------------
# HELPER: entangling layer with full/ring/linear topologies
# ------------------------------------------------------------
def _add_entangling_layer(
    qc: QuantumCircuit,
    *,
    num_layers: int = 1,
    entanglement: Literal["full", "ring", "linear"] = "full",
    gate: Literal["cx", "cz"] = "cx",
    alternate_directions: bool = True,
    add_barriers: bool = True,
) -> None:
    """
    Adds `num_layers` entangling layers to `qc` in-place.

    Topologies
    ----------
    full   : all unordered pairs (i < j)
    ring   : (i, i+1 mod n)     # wrap-around; degree-2 each
    linear : (i, i+1)           # no wrap; good for chains or 1D hardware

    Gate choice
    -----------
    cx : directional; we optionally flip directions on odd layers to avoid bias.
    cz : symmetric; directionless (useful if backend natively supports CZ).
    """
    n = qc.num_qubits
    assert n >= 2, "Need at least 2 qubits to entangle."

    # Build pair list once (deduplicated and safe).
    pairs = _pairs_for_topology(n, entanglement)

    for layer in range(num_layers):
        flipped = (gate == "cx") and alternate_directions and (layer % 2 == 1)

        for (i, j) in pairs:
            if gate == "cx":
                # Flip control/target every other layer to share control load,
                # which can help mitigate readout and direction-specific noise.
                c, t = (j, i) if flipped else (i, j)
                qc.cx(c, t)
            elif gate == "cz":
                # CZ is symmetric; no need to flip.
                qc.cz(i, j)
            else:
                raise ValueError("gate must be 'cx' or 'cz'.")

        if add_barriers and layer != num_layers - 1:
            qc.barrier()


def _pairs_for_topology(
    n: int,
    kind: Literal["full", "ring", "linear"],
) -> Sequence[Tuple[int, int]]:
    if kind == "full":
        # All-to-all once, no self-edges, i<j prevents duplicates.
        return [(i, j) for i in range(n) for j in range(i + 1, n)]
    if kind == "ring":
        # Degree-2 ring; last connects to first via modulo.
        return [(i, (i + 1) % n) for i in range(n)]
    if kind == "linear":
        # Open chain; no wrap-around.
        return [(i, i + 1) for i in range(n - 1)]
    raise ValueError("entanglement must be 'full', 'ring', or 'linear'")

def customized_entanglement(qc, indices):
    
    for x,y in indices:
        qc.cx(x,y)
        
    return qc


In [None]:
import shadows as sh
import pandas as pd

def normalize_new_features(unnormalized_features):
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler(feature_range=(0,1))

    normalized_features = scaler.fit_transform(unnormalized_features)
    return normalized_features
    

def run_pipeline(filename = None, ring_paulis = 'XY', entanglement = 'ring', num_layers = 1, encoding_axis = ("rx","ry"), train_test = 'train', qubits_to_entangle=None):
    
    ring_paulis = ring_paulis.upper()
    df = pd.read_csv(f'../Data/{filename}')


    data = df.to_numpy()
    n = len(data[0])

    circuits = [build_circuit(x, encoding_axes=encoding_axis, entanglement=entanglement, gate="cx", num_layers=num_layers, qubits_to_entangle=qubits_to_entangle)
                for x in data]

    paulis = sh.paulis_singles_xyz(n) + sh.paulis_ring_pairs(n, (ring_paulis[0], ring_paulis[1]))

    cfg = sh.ShadowConfig(T = 200, shots = 1000, seed = 123)

    new_features = sh.build_feature_matrix_from_circuits(circuits, paulis, cfg)
    
    normalized_features = normalize_new_features(new_features)
    
    df = pd.DataFrame(normalized_features)
    df.to_csv(f"../Data/{normalized_features.shape[1]}_features{ring_paulis[0]}{ring_paulis[1]}_{train_test}_05corr_quantumLayers2.csv", index = False)
    
    circuits[0].draw(fold=-1)


In [None]:
import pandas as pd

qubits_to_entangle = [(2, 4), (5, 6)]

run_pipeline("../Data/X_test_scaled.csv", qubits_to_entangle=qubits_to_entangle, train_test='test', num_layers = 2)
run_pipeline("../Data/X_train_scaled.csv", qubits_to_entangle=qubits_to_entangle, train_test='train', num_layers = 2)



In [None]:
data = pd.read_csv('../Data/32_featuresXY_test05corr.csv')

data = data.drop(data.columns[0], axis=1)

data.to_csv('../Data/32_featuresXY_train_05corr_quantumLayers2.csv', index = False)
print(data)

            0         1         2         3         4         5         6  \
0    0.492447  0.498197  0.999962  0.067448  0.755864  0.741542  0.031199   
1    0.411035  0.998052  0.023980  0.292523  0.012633  0.055718  0.797879   
2    0.614565  0.996723  0.020734  0.053006  0.770732  0.711020  0.565048   
3    0.571208  0.998776  0.009088  0.053146  0.770644  0.710144  0.021937   
4    0.620171  0.996644  0.018405  0.053480  0.770188  0.711994  0.884894   
..        ...       ...       ...       ...       ...       ...       ...   
635  0.609759  0.996526  0.023178  0.048137  0.770791  0.709819  0.504966   
636  0.457375  0.475222  0.997747  0.501489  0.501963  1.000000  0.345866   
637  0.633537  0.004395  0.020047  0.646067  0.005397  0.034645  0.646030   
638  0.608323  0.997039  0.016648  0.051782  0.771261  0.714397  0.521479   
639  0.616802  0.996802  0.019436  0.048944  0.770673  0.709040  0.543111   

            7         8         9  ...        22        23        24  \
0  

In [7]:
import pandas as pd

df = pd.read_excel('../Data/2025-Quantathon-Tornado-Q-training_data-640-examples.xlsx')

data = df['ef_class'].to_numpy()

In [21]:
import pandas as pd

# Load the dataset
df = pd.read_excel("../Data/2025-Quantum-Tornado-validation_data-160-examples.xlsx")

# Set your EF threshold
threshold = 1  # for example, EF2 and above

# Filter rows where the EF value is above the threshold
filtered_df = df[df["ef_class"] > threshold]

# Display or save the filtered data
print(filtered_df)
# Or save to a new file if needed:
# filtered_df.to_excel("filtered_tornado_data.xlsx", index=False)
filtered_df.to_csv("kernel_estimation_val.csv", index = False)

            cape         cin  dewpoint_2m     temp_2m       tcwv  \
2       0.000000         NaN   285.079681  292.058838  17.377098   
33   1172.039795  226.945221   293.526947  298.174744  30.334305   
41      0.000000         NaN   284.538086  300.196533  24.968369   
60      0.000000         NaN   274.363281  280.068604  11.277771   
62      0.625000         NaN   286.457031  303.894470  33.780228   
69      0.000000         NaN   281.735596  291.719482  10.768862   
94      0.000000         NaN   276.901367  289.785156  12.305121   
103     0.000000         NaN   278.570557  301.199280  17.808828   
111     0.674805         NaN   286.683899  294.732117  25.497208   
117   332.312500         NaN   291.985840  297.951447  22.263474   
127  3319.589355  530.922607   292.324432  303.649994  30.515503   
129     0.000000         NaN   277.571289  289.455811  11.509322   
137     4.010742         NaN   285.027405  291.084259  21.603561   
141     8.642616         NaN   287.531006  295.8

In [18]:
len(filtered_df)

21