In [None]:
%%capture
import numpy as np
!pip install qiskit
from qiskit import QuantumCircuit, ClassicalRegister, QuantumRegister, assemble
from qiskit import execute
from qiskit import BasicAer as Aer
from qiskit.tools.visualization import circuit_drawer, plot_histogram

## Hamiltonian and mapping

The following fermionic model describes the transition to a superconducting phase. We will consider a single particle space $\mathcal{H}$ with $2\Omega$ modes composed by $\Omega$ orthogonal single particle states $k$ and the corresponding $\Omega$ time-reversed states $\bar{k}$ described by the following Hamiltonian:

$H = \sum_k \varepsilon_k (c_k^\dagger c_k + c_{\bar{k}}^\dagger c_{\bar{k}}) - \sum_{kk'} G_{k k'}c_{k'}^\dagger c_{\bar{k'}}^\dagger c_{\bar{k}} c_k$


with $\varepsilon_k$ the energy of the level $k$ and $\bar{k}$. We will consider the half filled case, i.e. a fixed number of fermions $N=\Omega$. We will look at the case of equally spaced spectrum $\varepsilon_{k+1}-\varepsilon_k = \varepsilon\;\; \forall k $ and $G_{kk'} = G\geq 0 \;\; \forall k, k'$

The exact ground state will then be a linear combination of Slater Determinants with fixed fermion number N and fully occupied or empty pairs $(k,\bar{k})$

$|\psi\rangle = \sum_\nu \alpha_\nu [\prod_k (c_k^\dagger c_{\bar{k}}^\dagger)^{n_k^\nu}]|0\rangle$

where n_k^\nu = 0,1 indicates the occupation of pair $(k,\bar{k})$ $(\sum_k n_k^\nu = N/2)$ and $\nu = 1,\ldots,\binom{\Omega}{N/2}$ runs over these Slater Determinants $(\sum_\nu |\alpha_{\nu}|^2 = 1)$. The fundamental state of this Hamiltonian is already diagonal in the natural basis, which introduces some sort of "privileged basis". 


The fact that the fundamental state has no broken pairs (that is, modes $k$ and $\bar{k}$ are simultaniously occupied), allows us to map the ocupation of the different levels to spin projections (instead of modes to spines, which would require twice as many qubits). In order to understan the mapping, we could think of the simplest case of 2 energy levels (4 total modes). In that case, we would consider the ocupation basis $\{00,01,10,11\}$, which would map to $\{\downarrow \downarrow ,\downarrow \uparrow,\uparrow\downarrow ,\uparrow\uparrow\}$ in a 2 qubit system. Frthermore, in our case, we have fixed particle number, equal to half the available modes. Hence, only modes $\{01,10\}$ would show up. 

A great deal of fermionicity would normally be lost by fixing the basis, but with this specific selection that is not the case, as we are working on the basis which diagonalizis both the Hamiltonian and the one body matrix. 

## Dataset
Let's download the dataset


In [None]:
# This is for the Omega = 4 case (4 fermiones y 4 two-fold degenerate levels)
import requests
url = 'https://raw.githubusercontent.com/Marco-Di-Tullio/Fermionic-dataset/main/fermionic_dataset_4.csv'
r = requests.get(url, allow_redirects=True)
open('fermionic_dataset_4.csv', 'wb').write(r.content);


This is how it looks:

In [None]:
import pandas as pd
df = pd.read_csv('fermionic_dataset_4.csv')
df.head()

Unnamed: 0,g,0000,0001,0010,0011,0100,0101,0110,0111,1000,1001,1010,1011,1100,1101,1110,1111,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.01,0.0,0.0,0.0,1.5e-05,0.0,0.001675,0.002517,0.0,0.0,0.002517,0.005025,0.0,0.99998,0.0,0.0,0.0,0.0
2,0.02,0.0,0.0,0.0,5.9e-05,0.0,0.003367,0.005067,0.0,0.0,0.005067,0.010099,0.0,0.999918,0.0,0.0,0.0,0.0
3,0.03,0.0,0.0,0.0,0.000133,0.0,0.005075,0.00765,0.0,0.0,0.00765,0.015221,0.0,0.999813,0.0,0.0,0.0,0.0
4,0.04,0.0,0.0,0.0,0.000239,0.0,0.006801,0.010266,0.0,0.0,0.010266,0.020389,0.0,0.999664,0.0,0.0,0.0,0.0


In [None]:
datos = df.to_numpy()
datos

array([[0.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.01, 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.02, 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       ...,
       [4.98, 0.  , 0.  , ..., 0.  , 0.  , 1.  ],
       [4.99, 0.  , 0.  , ..., 0.  , 0.  , 1.  ],
       [5.  , 0.  , 0.  , ..., 0.  , 0.  , 1.  ]])

## Initializing Qiskit

We are using the initialize() command. The input is a vector in the $2^n$ basis, and the programs deduced the necesary operations for generating it. The underlying theory can be found  [in this tutorial](https://github.com/Qiskit/qiskit-tutorials/blob/master/tutorials/circuits/3_summary_of_quantum_operations.ipynb). 


In [None]:
# I remove labels and couplings
estado = datos[:,1:-1]

# Number of state
i = 25

# I convert it into a list for inputing qiskit
state = estado[i,:].tolist()
label = int(datos[i,-1])

# Desired vector lives in te 2^n space
desired_vector = state

q = QuantumRegister(4)
c = ClassicalRegister(1)

qc = QuantumCircuit(q,c)

qc.initialize(desired_vector, [q[0],q[1],q[2],q[3]])
qc.draw()
# It shows up in 2 lines because of string length

In [None]:
backend = Aer.get_backend('statevector_simulator')
job = execute(qc, backend)
qc_state = job.result().get_statevector(qc)
# The resulting state is written in the 2^n basis
qc_state

array([0.        +0.j, 0.        +0.j, 0.        +0.j, 0.01013337+0.j,
       0.        +0.j, 0.04679042+0.j, 0.07210914+0.j, 0.        +0.j,
       0.        +0.j, 0.07210914+0.j, 0.13663267+0.j, 0.        +0.j,
       0.98419512+0.j, 0.        +0.j, 0.        +0.j, 0.        +0.j])

In [None]:
# qc.measure(q[0], c[0])
# job = execute(qc, backend, shots=100)
# result = job.result()
# result.get_counts(qc)

## Train-test splitting for Machine Learning

In [None]:
from sklearn.model_selection import train_test_split

# We remove  couplings
estados = datos[:,1:]

# We split and shuffle
train, test = train_test_split(estados, test_size=0.2, random_state=0)

In [None]:
train[1,:]

array([0.        , 0.        , 0.        , 0.07529731, 0.        ,
       0.14444913, 0.21527301, 0.        , 0.        , 0.21527301,
       0.35389142, 0.        , 0.86921844, 0.        , 0.        ,
       0.        , 1.        ])