<a href="https://colab.research.google.com/github/PrakharPatni08/Gen-AI/blob/main/Autoencoder_VAE2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Lambda, RepeatVector, TimeDistributed, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import backend as K

In [None]:
from logging import log
#sample data
sample_sentences = [
    'Artificial Intelligence is transforming the world',
    'Deep learning models require large datasets',
    'Natural language processing enables chatbots',
    'Computer vision helps in image recognition',
    'Generative models create realistic content',
    'Machine learning algorithms improve over time',
    'AI powers virtual assistants like Siri and Alexa',
    'Reinforcement learning optimizes robot actions',
    'Neural networks simulate human brain structure',
    'AI detects fraud in financial transactions',
    'Speech recognition converts voice to text',
    'Self-driving cars use AI for navigation',
    'Image classification is done using CNNs',
    'Text summarization condenses long documents',
    'Chatbots use NLP to understand user intent',
    'Face detection is used in security systems',
    'AI enhances medical diagnosis accuracy',
    'Robotic process automation handles routine tasks',
    'AI recommends products based on preferences',
    'Language models generate human-like text',
    'Anomaly detection spots unusual patterns',
    'AI accelerates drug discovery processes',
    'Predictive analytics forecasts business trends',
    'AI systems require training and validation data',
    'Transfer learning adapts models to new tasks',
    'Object detection identifies items in images',
    'AI optimizes logistics and supply chains',
    'Voice assistants interpret spoken commands',
    'Recommendation systems suggest relevant content',
    'AI helps detect cyber security threats',
    'Vision systems track objects in real-time',
    'AI systems evolve with continuous learning',
    'NLP parses text into structured data',
    'AI analyzes customer sentiment on social media',
    'Deep learning enhances speech translation',
    'AI predicts equipment maintenance needs',
    'Semantic segmentation labels image regions',
    'AI reduces errors in legal document review',
    'Emotion detection gauges human feelings',
    'AI generates creative artwork and music',
    'Recurrent neural networks model sequences',
    'AI facilitates intelligent tutoring systems',
    'Machine vision inspects products on assembly lines',
    'AI improves handwriting recognition',
    'Generative AI simulates realistic scenarios',
    'AI personalizes user experiences online',
    'Robotics uses AI for motion planning',
    'AI speeds up insurance claim processing',
    'Deepfake detection identifies manipulated media',
    'AI models adapt through fine-tuning',
    'Machine learning identifies disease patterns',
    'AI predicts student performance outcomes',
    'AI-based OCR reads printed text',
    'Speech synthesis mimics natural human voices',
    'AI helps match job seekers with roles',
    'Autonomous drones use AI for path planning',
    'AI aids in wildlife tracking and protection',
    'AI scans resumes for talent acquisition',
    'Deep learning boosts language translation tools',
    'AI interprets X-rays and MRI scans',
    'Cognitive computing mimics human thought processes',
    'AI forecasts weather with greater accuracy',
    'AI models learn from labeled data',
    'AI systems monitor factory production lines',
    'Voice biometrics identify individuals',
    'AI writes personalized marketing emails',
    'AI powers search engine optimization tools',
    'AI improves call center efficiency',
    'Machine learning enables financial forecasting',
    'AI supports adaptive e-learning platforms',
    'AI automates inventory management',
    'Facial recognition authorizes device access',
    'AI spots defects in manufacturing',
    'Natural language generation creates reports',
    'AI models power fraud detection tools',
    'AI predicts loan repayment probability',
    'Virtual agents assist in customer support',
    'AI suggests playlist based on mood',
    'AI enables real-time language transcription',
    'AI improves gaming NPC behaviors',
    'Machine learning models predict churn rate',
    'AI performs legal contract analysis',
    'Speech-to-text is powered by AI',
    'AI-based avatars simulate human interaction',
    'Autonomous vehicles rely on AI vision',
    'AI assists in energy usage optimization',
    'AI manages smart home devices',
    'Computer vision tracks people in crowds',
    'AI analyzes sports performance metrics',
    'AI supports automated grading of exams',
    'AI assists in architectural design simulations',
    'AI filters spam and phishing emails',
    'AI enables fast data categorization',
    'Knowledge graphs represent relationships in AI',
    'AI aids in supply chain risk analysis',
    'Conversational AI supports natural dialogue',
    'AI detects plagiarism in content',
    'AI-powered translation bridges language gaps',
    'AI identifies potential cybersecurity breaches',
    'Deep learning assists in protein folding',
    'AI enhances virtual and augmented reality',
    'AI powers personalized healthcare recommendations',
    'AI processes satellite images for agriculture',
    'AI drives innovations in smart cities',
    'AI aids in disaster response planning'
]

#sample_sentences=['Artifical Intelligence is transforming the world',
        #'Deep learningn model require large dataset',
       # 'Natural language processing enable chatbots',
       # 'Computer vision help in image recognition',
       # 'Generative models create realistic content']

#tokenization
vocab_limit=100
sequence_length=10
text_processor=Tokenizer(num_words=vocab_limit, oov_token="<OOV>")
text_processor.fit_on_texts(sample_sentences)
tokenized=text_processor.texts_to_sequences(sample_sentences)
padded_sequence=pad_sequences(tokenized,maxlen=sequence_length,padding='post')

#model config
embed_dim=64
latent_space=16

#encoder
input_layer=Input(shape=(sequence_length,), name='text_input')
embed_layer=Embedding(vocab_limit,embed_dim,mask_zero=True)(input_layer)
encoder_lstm=LSTM(64)(embed_layer)

z_mu=Dense(latent_space,name='latent_mean')(encoder_lstm)
z_log_sigma=Dense(latent_space,name='latent_log_var')(encoder_lstm)

def sample_data(args):
  mu,log_sigma=args
  eps=K.random_normal(shape=(K.shape(mu)[0], K.shape(mu)[1])) # Corrected shape for epsilon
  return mu+K.exp(log_sigma*0.5)*eps

z_vector= Lambda(sample_data,name='latent_sampler')([z_mu,z_log_sigma])

#KLDivergence
class KLLOss(Layer):
  def call(self,inputs):
    mu,log_sigma=inputs
    kl=-0.5*K.sum(1+log_sigma-K.square(mu)-K.exp(log_sigma),axis=1)
    self.add_loss(K.mean(kl)) # Corrected variable name from k1 to kl
    return inputs

kl_triggers=KLLOss()([z_mu,z_log_sigma]) #activates KL loss

#decoder
repeat_z=RepeatVector(sequence_length)(z_vector)
decoder_lstm=LSTM(64,return_sequences=True)(repeat_z)
dense_output=TimeDistributed(Dense(vocab_limit,activation='softmax'))(decoder_lstm)

#full VAE Model
vae_model=Model(input_layer,dense_output,name="text_vae_model")
vae_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy')

#prepare op for training
target_output=np.expand_dims(padded_sequence,-1)
vae_model.fit(padded_sequence,target_output,epochs=100,batch_size=2)

#encoder
text_encoder=Model(input_layer,z_mu,name='encoder_only')

#decoder
latent_input=Input(shape=(latent_space,), name='decoder_input')
repeated_input=RepeatVector(sequence_length)(latent_input)
decoded_seq=LSTM(64,return_sequences=True)(repeated_input)
final_output=TimeDistributed(Dense(vocab_limit,activation='softmax'))(decoded_seq)
text_decoder=Model(latent_input,final_output,name='decoder_only')

Epoch 1/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - loss: 4.2622
Epoch 2/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 2.3076
Epoch 3/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 2.0102
Epoch 4/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.8468
Epoch 5/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 1.8298
Epoch 6/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.7791
Epoch 7/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.7492
Epoch 8/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.6629
Epoch 9/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.6571
Epoch 10/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - lo

In [None]:
import numpy as np
random_z=np.random.normal(size=(1, latent_space))
generated_seq_probs=text_decoder.predict(random_z)
generated_seq_ids=np.argmax(generated_seq_probs,axis=-1)[0]

idx_word={v: k for k, v in text_processor.word_index.items()}

idx_word[0]=''
idx_word[text_processor.word_index.get("<OOV>",1)]='<OOV>'

generated_words=[idx_word.get(idx, "<UNK>") for idx in generated_seq_ids] #unknown - unk
generated_text=' '.join(generated_words).strip()
print("Generated text:\n",generated_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437ms/step
Generated text:
 over over over over over over over over over over


In [None]:
def temp_sampling(prob,temp=1.0):
  a=np.log(prob+1e-9)/temp
  b=np.exp(a)
  c=b/np.sum(b)
  return np.random.choice(len(c),p=c)

temp=0.5
random_z=np.random.normal(size=(1, latent_space))
generated_seq_probs=text_decoder.predict(random_z)
generated_seq_ids=[temp_sampling(words, temp)for words in generated_seq_probs[0]]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


In [None]:
#convert idx to words
idx_word=text_processor.index_word
idx_word[0]=''
generated_words=[idx_word.get(idx, "<UNK>") for idx in generated_seq_ids] #unknown - unk
generated_text=' '.join(generated_words).strip()
print("Generated text:\n",generated_text)

Generated text:
 enhances learning optimizes optimizes virtual media voice networks enhances generative


1. what is NLU and NLG
2. What is bag of words
3. Whta is stemming
4. What is TFIDF
5. What is POS padding
6. Embedding techniques - word2vec and glove
7. Difference between count vectorizer and tfidf
8. What is transformer
9. What is Bert  model
10. How would you handle OOV words in text corpus
11. What is MLM
12. What is NMT -
13. What is tokenizer
14. what is embedding
15. what is stop words