Importing libraries

In [None]:
## Import required libraries
import sympy as sp
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input,LSTM,Dense,Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import random
import re

Data Creation

In [None]:
## Create a dataset having functions and corresponding taylor expansions using sympy
x=sp.symbols('x')
degree=3
num_polynomials=100
x0=0
order=4

def generate_random_polynomials(degree,num_polynomials,x_var):
  polynomials=[]
  for _ in range(num_polynomials):
    coefficients=[random.randint(-10,10) for _ in range(degree+1)]
    polynomial=sum(coefficients*(x_var**i) for i,coefficients in enumerate(coefficients))
    polynomials.append(polynomial)
  return polynomials

def add_more_functions():
  functions=[
      sp.sin(x),
      sp.cos(x),
      sp.tan(x),
      sp.exp(x),
      sp.log(x),
      sp.sqrt(x),
      sp.sinh(x),
      sp.cosh(x),
      sp.tanh(x),
      sp.asin(x),
      sp.acos(x),
      sp.atan(x),
      1/(1-x),
      1/(x+3)
  ]
  polynomials=generate_random_polynomials(degree,num_polynomials,x)
  funcs=polynomials+functions
  return funcs

def generate_taylor_data(x_var,x0,order):
  data=[]
  functions=add_more_functions()
  for func in functions:
    taylor_series=sp.series(func,x_var,x0,n=order+1).removeO()
    data.append({'Functions':str(func),'Taylor Series':str(taylor_series)})
  return pd.DataFrame(data)

df=generate_taylor_data(x,x0,order)
print(df.shape)

(114, 2)


Math Tokenization

In [None]:
## Math tokenization
def tokenize(expression):
  characters=r'(\b(?:sin|cos|tan|exp|log|sqrt|...)\b|\w+|[+\-*/^()]|.)'
  tokens=re.findall(characters,expression)
  tokens=[token for token in tokens if token.strip()]
  return tokens

df['Functions']=df['Functions'].apply(tokenize)
df['Taylor Series']=df['Taylor Series'].apply(tokenize)

In [5]:
df.tail()

Unnamed: 0,Functions,Taylor Series
109,"[asin, (, x, )]","[x**, 3/6, + , x]"
110,"[acos, (, x, )]","[-, x**, 3/6, - , x, + , pi/, 2]"
111,"[atan, (, x, )]","[-, x**, 3/3, + , x]"
112,"[1/(, 1, - , x, )]","[x**, 4, + , x**, 3, + , x**, 2, + , x, + ..."
113,"[1/(, x, + , 3, )]","[x**, 4, /, 243, - , x**, 3, /81, + , x**, 2..."


Keras Tokenizer

In [None]:
input_texts=list(df['Functions'])## Input tokens
targets=list(df['Taylor Series'])## Output tokens

tokenizer=Tokenizer()## Define tokenizer
tokenizer.fit_on_texts(input_texts+targets)

inputs=tokenizer.texts_to_sequences(input_texts)
targets=tokenizer.texts_to_sequences(targets)

max_sequence_length=max(max(len(seq) for seq in inputs),max(len(seq) for seq in targets))
encoder_input_data=pad_sequences(inputs,maxlen=max_sequence_length,padding='post')## Padding to inputs
decoder_input_data=pad_sequences(targets,maxlen=max_sequence_length,padding='post')## Padding to outputs

## Outputs(shift by 1)
decoder_target_data=np.zeros_like(decoder_input_data)
decoder_target_data[:,1:]=decoder_input_data[:,:-1]
decoder_target_data=tf.keras.utils.to_categorical(decoder_target_data,num_classes=len(tokenizer.word_index)+1)

Training LSTM

In [None]:
## Define model architecture

## Encoder
encoder_inps=Input(shape=(None,))
encoder_embeddings=Embedding(len(tokenizer.word_index)+1,256)(encoder_inps)
encoder_lstm=LSTM(256,return_state=True)
encoder_outputs,state_h,state_c=encoder_lstm(encoder_embeddings)
encoder_states=[state_h,state_c]

## Decoder
decoder_inps=Input(shape=(None,))
decoder_embeddings=Embedding(len(tokenizer.word_index)+1,256)(decoder_inps)
decoder_lstm=LSTM(256,return_sequences=True,return_state=True)
decoder_outputs,_,_=decoder_lstm(decoder_embeddings,initial_state=encoder_states)
dense=Dense(len(tokenizer.word_index)+1,activation='softmax')
decoder_outputs=dense(decoder_outputs)

## Train the model
model=Model([encoder_inps,decoder_inps],decoder_outputs)

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

model.fit([encoder_input_data,decoder_input_data],decoder_target_data,epochs=50,batch_size=32)

Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 55ms/step - accuracy: 0.2188 - loss: 3.8851
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.4306 - loss: 2.9586
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.4295 - loss: 2.3113
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.5361 - loss: 2.0894
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.5245 - loss: 1.8472
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.5707 - loss: 1.7141
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.5746 - loss: 1.5977
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6147 - loss: 1.5181
Epoch 9/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x20d52aa6510>