In [None]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from kan import *
import optuna

df = pd.read_csv(r'dataset_5578_normalized.csv')

train_df, test_df = train_test_split(df, test_size=0.1, random_state=2024)

train_input = torch.tensor(train_df.drop(columns=['log(kappa)', 'compound','aurl']).values,
                           dtype=torch.float32)
train_label = torch.tensor(train_df['log(kappa)'].values, dtype=torch.float32).unsqueeze(1)

test_input = torch.tensor(test_df.drop(columns=['log(kappa)', 'compound','aurl']).values,
                          dtype=torch.float32)
test_label = torch.tensor(test_df['log(kappa)'].values, dtype=torch.float32).unsqueeze(1)

dataset = {
    'train_input': train_input,
    'train_label': train_label,
    'test_input': test_input,
    'test_label': test_label
}


def calculate_r2(model, input_data, true_labels):
    predictions = model(input_data).detach().cpu().numpy()[:, 0]
    true_values = true_labels[:, 0].cpu().numpy()
    return r2_score(true_values, predictions)


best_model = KAN(width=[10, 7, 1], grid=15, k=5,
                 noise_scale=0, seed=0, symbolic_enabled=True)

best_model.train(dataset, opt="LBFGS", steps=27, lr=0.47852297530306415, lamb_l1=9.559785284286264, img_folder='img', save_fig=False)

train_r2 = calculate_r2(best_model, dataset['train_input'], dataset['train_label'])
test_r2 = calculate_r2(best_model, dataset['test_input'], dataset['test_label'])

train_predictions = best_model(dataset['train_input']).detach().cpu().numpy()
test_predictions = best_model(dataset['test_input']).detach().cpu().numpy()

train_results = pd.DataFrame({
    'compound': train_df['compound'],
    'True_Values': dataset['train_label'][:, 0].numpy(),
    'Predicted_Values': train_predictions[:, 0]
})

test_results = pd.DataFrame({
    'compound': test_df['compound'],
    'True_Values': dataset['test_label'][:, 0].numpy(),
    'Predicted_Values': test_predictions[:, 0]
})

train_results.to_csv('final_train_predictions_kappa.csv', index=False)
test_results.to_csv('final_test_predictions_kappa.csv', index=False)


In [None]:
test_predictions_df = pd.read_csv('final_test_predictions_kappa.csv')
test_r2_loaded = r2_score(test_predictions_df['True_Values'], test_predictions_df['Predicted_Values'])

print('Test R² :', test_r2_loaded)
best_model.auto_symbolic()

In [None]:
# formula = best_model.symbolic_formula(
#     var=[
#         'spacegroup_relax',
#         'agl_debye','agl_gruneisen',
#         'agl_heat_capacity_Cp_300K','agl_heat_capacity_Cv_300K',                                          
#         'agl_thermal_expansion_300K','agl_vibrational_entropy_300K_atom','agl_vibrational_free_energy_300K_atom',                                          
#         'agl_bulk_modulus_static_300K','agl_bulk_modulus_isothermal_300K'
#         ]
# )[0][0]
# formula

formula = best_model.symbolic_formula(
    var=['SG','\Theta', '\gamma', 'C_p', 'C_v', r'\alpha', 'S_v', 'F', 'K_{s}',
             'K_{i}']
)[0]
formula

# formula = best_model.symbolic_formula(
# var=['d_1','d_2','d_3','d_4','d_5','d_6','d_7','d_8','d_9','d_10']
# )[0]
# formula

In [None]:
formula = best_model.symbolic_formula(
    var=['SG','\Theta', '\gamma', 'C_p', 'C_v', r'\alpha', 'S_v', 'F', 'K_{s}',
             'K_{i}']
)[0][0]
formula

In [None]:
best_model(dataset['train_input'])
best_model.plot(
    folder='img',
    in_vars=['SG','\Theta', '\gamma', 'C_p', 'C_v', r'\alpha', 'S_v', 'F', 'K_{s}',
             'K_{i}'],
    out_vars=['$\kappa$'], title='$\kappa$ - KAN',beta=5
               )

In [None]:

import moviepy.video.io.ImageSequenceClip  # moviepy == 1.0.3
import os
import numpy as np

image_folder = 'img'
video_name = 'video_kappa'
fps = 4

fps = fps
files = os.listdir(image_folder)
train_index = []
for file in files:
    if file[0].isdigit() and file.endswith('.jpg'):
        train_index.append(int(file[:-4]))

train_index = np.sort(train_index)

image_files = [image_folder + '/' + str(train_index[index]) + '.jpg' for index in train_index]

clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(image_files, fps=fps)
clip.write_videofile(video_name + '.mp4')

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import numpy as np
from sklearn.metrics import r2_score

df = pd.read_csv(r'C:\Users\11097\Desktop\新论文\KAN_kappa\dataset_5578_normalized.csv')

train_df, test_df = train_test_split(df, test_size=0.1, random_state=2024)

train_input = torch.tensor(train_df.drop(columns=['log(kappa)', 'compound','aurl']).values,
                           dtype=torch.float32)
train_label = torch.tensor(train_df['log(kappa)'].values, dtype=torch.float32).unsqueeze(1)

test_input = torch.tensor(test_df.drop(columns=['log(kappa)', 'compound','aurl']).values,
                          dtype=torch.float32)
test_label = torch.tensor(test_df['log(kappa)'].values, dtype=torch.float32).unsqueeze(1)

dataset = {
    'train_input': train_input,
    'train_label': train_label,
    'test_input': test_input,
    'test_label': test_label
}

d_1 = test_input[:, 0]
d_2 = test_input[:, 1]
d_3 = test_input[:, 2]
d_4 = test_input[:, 3]
d_5 = test_input[:, 4]
d_6 = test_input[:, 5]
d_7 = test_input[:, 6]
d_8 = test_input[:, 7]
d_9 = test_input[:, 8]
d_10 = test_input[:, 9]

In [None]:
formula_out=(
    0.12 * np.sin(-2.04 * (0.74 - d_9)**2 + 0.34 * np.tan(2.08 * d_1 - 0.86) - 0.26 * np.tan(2.7 * d_8 + 7.97) 
        + 0.34 * np.tanh(10.0 * d_3 - 7.6) + 0.36 * np.abs(6.35 * d_2 - 1.02) 
        + 1.1 * np.arctan(3.98 * d_10 - 1.17) + 15.72 - 9.96 * np.exp(-100.0 * (-d_6 - 0.1)**2) 
        - 0.38 * np.exp(-100.0 * (0.22 - d_5)**2) + 0.4 * np.exp(-100.0 * (0.08 - d_4)**2) 
        - 0.68 * np.exp(-77.18 * (0.07 - d_7)**2)) 
    - 0.08 * np.sin(-0.29 * np.log(np.cosh(6.02 * d_10 - 1.42)) 
        - 0.18 * np.log(np.cosh(8.13 * d_9 - 2.17)) + 0.14 * np.tan(2.97 * d_1 + 1.62) 
        - 0.27 * np.tan(2.94 * d_2 - 7.78) + 0.66 * np.tan(2.16 * d_7 + 8.41) - 10.4 
        + 0.83 * np.exp(-100.0 * (0.43 - d_3)**2) - 0.34 * np.exp(-44.44 * (0.35 - d_8)**2) 
        + 0.51 * np.exp(-88.36 * (0.15 - d_5)**2) + 1.2 * np.exp(-100.0 * (0.14 - d_4)**2) 
        + 0.94 * np.exp(-54.76 * (0.08 - d_6)**2)) 
    + 2.94 * np.arcsin(-0.18 * (0.4 - d_1)**3 + 0.05 * np.tan(1.94 * d_10 + 2.43) 
        + 0.06 * np.cosh(1.11 * d_9 + 0.6) - 0.04 * np.tanh(10.0 * d_3 - 4.66) 
        + 0.3 * np.arcsin(0.4 * d_8 - 1.01) + 0.15 + 0.24 * np.exp(-100.0 * (-d_5 - 0.06)**2) 
        + 0.14 * np.exp(-54.76 * (-d_4 - 0.05)**2) - 0.23 * np.exp(-96.04 * (-d_2 - 0.02)**2) 
        + 0.11 * np.exp(-43.56 * (0.08 - d_7)**2) + 0.19 * np.exp(-56.5 * (0.02 - d_6)**2)) 
    + 0.12 + 0.21 * np.exp(-21.93 * (-0.04 * np.abs(8.17 * d_5 - 0.62) - 0.07 * np.abs(7.18 * d_7 - 0.62) 
        - 0.27 * np.arctan(2.03 * d_10 - 0.09) - 0.04 * np.arctan(10.0 * d_8 - 5.0) - 0.18 * np.arctanh(1.8 * d_9 - 0.96) 
        - 0.15 + 0.61 * np.exp(-100.0 * (-d_6 - 0.06)**2) - np.exp(-100.0 * (-d_2 - 0.12)**2) 
        - 0.04 * np.exp(-92.16 * (0.81 - d_1)**2) - 0.06 * np.exp(-88.36 * (0.53 - d_3)**2) 
        + 0.18 * np.exp(-11.56 * (0.02 - d_4)**2))**2) 
    - 0.39 * np.exp(-1.51 * (-0.09 * np.tan(3.77 * d_4 - 7.38) - np.tanh(10.0 * d_2 - 0.27) - 0.05 * np.abs(6.8 * d_5 - 0.41) 
        - 0.43 * np.arctan(3.75 * d_8 - 2.16) + 0.49 * np.arctan(8.0 * d_9 - 0.75) - 0.26 - 0.16 * np.exp(-23.04 * (0.86 - d_3)**2) 
        - 0.09 * np.exp(-18.55 * (0.61 - d_1)**2) - 0.74 * np.exp(-30.78 * (0.08 - d_7)**2) - 0.5 * np.exp(-57.93 * (0.05 - d_10)**2) 
        - 0.74 * np.exp(-38.44 * (0.01 - d_6)**2))**2) 
    + 0.08 * np.exp(-7.48 * (-0.08 * np.tan(2.79 * d_1 + 8.0) - 0.18 * np.tan(2.77 * d_2 + 4.81) - 0.91 * np.tan(1.88 * d_8 + 8.55) 
        - 0.08 * np.tanh(10.0 * d_3 - 5.8) - 0.23 * np.abs(8.78 * d_10 - 0.82) - 0.17 * np.abs(9.64 * d_9 - 0.98) 
        - 0.09 * np.arctan(7.62 * d_4 - 0.79) - 1 + 0.19 * np.exp(-100.0 * (-d_5 - 0.04)**2) + 0.38 * np.exp(-21.16 * (0.23 - d_7)**2) 
        + 0.29 * np.exp(-100.0 * (0.12 - d_6)**2))**2) 
    - 0.35 * np.exp(-12.91 * (-0.03 * np.tan(3.17 * d_9 + 1.6) - 0.07 * np.arctan(10.0 * d_2 - 1.2) + np.exp(-100.0 * (-d_6 - 0.08)**2) 
        - 0.05 * np.exp(-100.0 * (1 - d_1)**2) - 0.03 * np.exp(-100.0 * (0.48 - d_3)**2) - 0.05 * np.exp(-71.61 * (0.39 - d_8)**2) 
        - 0.07 * np.exp(-60.06 * (0.33 - d_10)**2) + 0.06 * np.exp(-43.56 * (0.2 - d_7)**2) + 0.07 * np.exp(-77.44 * (0.18 - d_5)**2) 
        + 0.08 * np.exp(-100.0 * (0.1 - d_4)**2))**2)
)

test_r2_loaded = r2_score(dataset['test_label'], formula_out)

print('Test R² :', test_r2_loaded)

In [None]:
# formula complexity
expression = """
0.12*sin(-2.04*(0.74 - x_9)**2 + 0.34*tan(2.08*x_1 - 0.86) - 0.26*tan(2.7*x_8 + 7.97) + 0.34*tanh(10.0*x_3 - 7.6) + 0.36*Abs(6.35*x_2 - 1.02) + 1.1*atan(3.98*x_10 - 1.17) + 15.72 - 9.96*exp(-100.0*(-x_6 - 0.1)**2) - 0.38*exp(-100.0*(0.22 - x_5)**2) + 0.4*exp(-100.0*(0.08 - x_4)**2) - 0.68*exp(-77.18*(0.07 - x_7)**2)) - 0.08*sin(-0.29*log(cosh(6.02*x_10 - 1.42)) - 0.18*log(cosh(8.13*x_9 - 2.17)) + 0.14*tan(2.97*x_1 + 1.62) - 0.27*tan(2.94*x_2 - 7.78) + 0.66*tan(2.16*x_7 + 8.41) - 10.4 + 0.83*exp(-100.0*(0.43 - x_3)**2) - 0.34*exp(-44.44*(0.35 - x_8)**2) + 0.51*exp(-88.36*(0.15 - x_5)**2) + 1.2*exp(-100.0*(0.14 - x_4)**2) + 0.94*exp(-54.76*(0.08 - x_6)**2)) + 2.94*asin(-0.18*(0.4 - x_1)**3 + 0.05*tan(1.94*x_10 + 2.43) + 0.06*cosh(1.11*x_9 + 0.6) - 0.04*tanh(10.0*x_3 - 4.66) + 0.3*asin(0.4*x_8 - 1.01) + 0.15 + 0.24*exp(-100.0*(-x_5 - 0.06)**2) + 0.14*exp(-54.76*(-x_4 - 0.05)**2) - 0.23*exp(-96.04*(-x_2 - 0.02)**2) + 0.11*exp(-43.56*(0.08 - x_7)**2) + 0.19*exp(-56.5*(0.02 - x_6)**2)) + 0.12 + 0.21*exp(-21.93*(-0.04*Abs(8.17*x_5 - 0.62) - 0.07*Abs(7.18*x_7 - 0.62) - 0.27*atan(2.03*x_10 - 0.09) - 0.04*atan(10.0*x_8 - 5.0) - 0.18*atanh(1.8*x_9 - 0.96) - 0.15 + 0.61*exp(-100.0*(-x_6 - 0.06)**2) - exp(-100.0*(-x_2 - 0.12)**2) - 0.04*exp(-92.16*(0.81 - x_1)**2) - 0.06*exp(-88.36*(0.53 - x_3)**2) + 0.18*exp(-11.56*(0.02 - x_4)**2))**2) - 0.39*exp(-1.51*(-0.09*tan(3.77*x_4 - 7.38) - tanh(10.0*x_2 - 0.27) - 0.05*Abs(6.8*x_5 - 0.41) - 0.43*atan(3.75*x_8 - 2.16) + 0.49*atan(8.0*x_9 - 0.75) - 0.26 - 0.16*exp(-23.04*(0.86 - x_3)**2) - 0.09*exp(-18.55*(0.61 - x_1)**2) - 0.74*exp(-30.78*(0.08 - x_7)**2) - 0.5*exp(-57.93*(0.05 - x_10)**2) - 0.74*exp(-38.44*(0.01 - x_6)**2))**2) + 0.08*exp(-7.48*(-0.08*tan(2.79*x_1 + 8.0) - 0.18*tan(2.77*x_2 + 4.81) - 0.91*tan(1.88*x_8 + 8.55) - 0.08*tanh(10.0*x_3 - 5.8) - 0.23*Abs(8.78*x_10 - 0.82) - 0.17*Abs(9.64*x_9 - 0.98) - 0.09*atan(7.62*x_4 - 0.79) - 1 + 0.19*exp(-100.0*(-x_5 - 0.04)**2) + 0.38*exp(-21.16*(0.23 - x_7)**2) + 0.29*exp(-100.0*(0.12 - x_6)**2))**2) - 0.35*exp(-12.91*(-0.03*tan(3.17*x_9 + 1.6) - 0.07*atan(10.0*x_2 - 1.2) + exp(-100.0*(-x_6 - 0.08)**2) - 0.05*exp(-100.0*(1 - x_1)**2) - 0.03*exp(-100.0*(0.48 - x_3)**2) - 0.05*exp(-71.61*(0.39 - x_8)**2) - 0.07*exp(-60.06*(0.33 - x_10)**2) + 0.06*exp(-43.56*(0.2 - x_7)**2) + 0.07*exp(-77.44*(0.18 - x_5)**2) + 0.08*exp(-100.0*(0.1 - x_4)**2))**2)
"""

operators = ["+", "-", "*", "/", "**"]
functions = ["sin", "tan", "tanh", "cosh", "atan", "atanh", "exp", "log", "Abs",'**']

function_count = sum(expression.count(func) for func in functions)

total_count = function_count

print(f"Count: {total_count}")
