# Classify if Sha of the Elkies-Klagsbrun curve is trivial

In [3]:
from lib import utils
from lib import models
from lib import executor
import torch
import pandas as pd
import pyarrow.parquet as pq
import numpy as np
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.metrics import matthews_corrcoef
import matplotlib.pyplot as plt

# Prepare data

In [4]:
# fix the random seed
seed = 42

# load your data here. The following ensure this will work on Windows as well as Unix
# columns to read
columns = ['rank', 'regulator', 'torsion', 'sha', 'real_period', 'tamagawa_product']

# 1. load the small conductor dataset
path = '../data_files/sha/ecq_sha_B_100_conds_1_500000_reg.parquet'
# Read the specified columns using PyArrow
table = pq.read_table(path, columns=columns)
# Convert the PyArrow Table to a Pandas DataFrame
df = table.to_pandas()

# 2. load the big conductor dataset 
path = '../data_files/sha/ecq_sha_B_1000_conds_lt_500k.parquet'
# Read the specified columns using PyArrow
table = pq.read_table(path, columns=columns)
# Convert the PyArrow Table to a Pandas DataFrame
df_large_cond = table.to_pandas()

# 3. concatenate both
df = pd.concat([df, df_large_cond], axis=0)

# 4. get the sqrt of sha
df['sqrt_sha'] = df['sha'].apply(lambda x: int(x**0.5))
df.drop('sha', axis=1, inplace=True)
label_col = 'sqrt_sha'

# 5. convert if sqrt_sha is trivial
df['sqrt_sha'] = df['sqrt_sha'].apply(lambda x: x > 1)

# 6. log transform X
feature_columns = [c for c in df.columns if c != 'sqrt_sha']
X = df[feature_columns]
X = np.log(X.values)
y = df[['sqrt_sha']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# 7. prepare the input of the Elkies-Klagsbrun curve
# X input: 
# ['rank', 'regulator', 'torsion', 'real_period', 'tamagawa_product']
Elkies_Klagsbrun_X = np.array([29, 1433744182671713097629179252379019849.493842, 1, 3.5090427060633614999186666781786131525e-15, 10725120]).reshape(-1, 5)

  X = np.log(X.values)


# Gradient boost model

In [5]:
# Run a tree classification model
model = HistGradientBoostingClassifier(random_state=seed)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate MCC
mcc = matthews_corrcoef(y_test, y_pred)

print(f'In the test set: Accuracy: {accuracy:0.8f}. MCC: {mcc:0.8f}')

# predict sha of the Elkies-Klagsbrun curve
print('-'*20)
print(f"The model predicts that Elkie-Klagsbrun cuve has Sha trivial: {not model.predict(Elkies_Klagsbrun_X)[0]} with probability {model.predict_proba(Elkies_Klagsbrun_X)[0][0]}")

  y = column_or_1d(y, warn=True)


In the test set: Accuracy: 0.93135872. MCC: 0.47209114
--------------------
The model predicts that Elkie-Klagsbrun cuve has Sha trivial: True with probability 0.9999287932325964


# NN model

In [7]:
# check if we have cuda available
device = 'cpu' if not torch.cuda.is_available() else 'cuda'
print(f'Using {device} device')

# choose model parameters
hidden_units = [128,64,32]

# choose training parameters
loss_func = nn.CrossEntropyLoss()
num_epochs = 3
lr = 0.0005
evaluator = matthews_corrcoef

Using cuda device


In [15]:
# train and test the result
input_dim, output_dim = utils.get_input_output_dim(df, 'sqrt_sha', if_regression=False)
model = models.VanillaNN(input_dim, hidden_units, output_dim, if_dropout=True, dropout_rate=0.6, if_batchnorm=True).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr) # reinitialize optimizer
train_dataloader, val_dataset, test_dataset = utils.prepare_data(df, 'sqrt_sha', device, if_regression=False, random_state=seed)
model, train_eval_hist, val_eval_hist, train_loss_hist, val_loss_hist = executor.train(model, train_dataloader, val_dataset, loss_func, evaluator, optimizer, num_epochs, if_regression=False, verbose=True)
mcc = executor.test(model, test_dataset, evaluator, if_regression=False)
accuracy = executor.test(model, test_dataset, accuracy_score, if_regression=False)
print(f'In the test set: Accuracy: {accuracy:0.8f}. MCC: {mcc:0.8f}')
print('-'*20)

# predict sha of Elkies-Klagsbrun curve
sm = nn.Softmax(1)  # softmax layer
pred = model(torch.tensor(Elkies_Klagsbrun_X,dtype=torch.float32).to(device))  # output of the model
prob = sm(pred)  # probability predicted by the model
print(f"The model predicts that Elkie-Klagsbrun cuve has Sha trivial: {True if torch.argmax(prob) == 0 else False} with probability {prob[0][torch.argmax(prob)]}")

The model predicts that Elkie-Klagsbrun cuve has Sha trivial: True with probability 1.0
