# Predict the Sha size of the Elkies-Klagsbrun curve

In [6]:
import torch
import pandas as pd
import pyarrow.parquet as pq
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef

# Prepare data

In [7]:
# fix the random seed
seed = 42

# load your data here. The following ensure this will work on Windows as well as Unix
# columns to read
columns = ['rank', 'regulator', 'torsion', 'sha', 'real_period', 'tamagawa_product']

# 1. load the small conductor dataset
path = '../data_files/sha/ecq_sha_B_100_conds_1_500000_reg.parquet'
# Read the specified columns using PyArrow
table = pq.read_table(path, columns=columns)
# Convert the PyArrow Table to a Pandas DataFrame
df = table.to_pandas()

# 2. load the big conductor dataset 
path = '../data_files/sha/ecq_sha_B_1000_conds_lt_500k.parquet'
# Read the specified columns using PyArrow
table = pq.read_table(path, columns=columns)
# Convert the PyArrow Table to a Pandas DataFrame
df_large_cond = table.to_pandas()

# 3. concatenate both
df = pd.concat([df, df_large_cond], axis=0)

# 4. get the sqrt of sha
df['sqrt_sha'] = df['sha'].apply(lambda x: int(x**0.5))
df.drop('sha', axis=1, inplace=True)
label_col = 'sqrt_sha'

# Train the model and evaluate it on test set


In [10]:
# Run a tree regression model

feature_columns = [c for c in df.columns if c != 'sqrt_sha']
X = df[feature_columns]
y = df[['sqrt_sha']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
model = HistGradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train
y_pred = model.predict(X_test)
y_pred_rounded = np.round(y_pred).astype(int)

# # Convert y_test to a 1D list of actual values
y_test = y_test.values.flatten()

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_rounded)
# calculate MCC
MCC = matthews_corrcoef(y_test, y_pred_rounded)

print(f'On test set: Accuracy: {accuracy:0.8f}. MCC: {MCC:0.8f}.')

# Predict the Sha size of E29

In [12]:
# X input: 
# ['rank', 'regulator', 'torsion', 'real_period', 'tamagawa_product']
Elkies_Klagsbrun_X = np.array([29, 1433744182671713097629179252379019849.493842, 1, 3.5090427060633614999186666781786131525e-15, 10725120]).reshape(-1, 5)
print(f"The model predicts that Elkie-Klagsbrun cuve has Sha size: {np.round(model.predict(Elkies_Klagsbrun_X))[0]}")

The model predicts that Elkie-Klagsbrun cuve has Sha size: 1.0


