In [6]:
import os
import google.generativeai as palm

palm.configure(api_key=os.environ["GOOGLE_API_KEY"])
import pandas as pd

In [7]:
import google.generativeai as palm

import chromadb
from chromadb.api.types import Documents, Embeddings

In [3]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredPDFLoader  #load pdf
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.text_splitter import CharacterTextSplitter #text splitter

In [4]:
response = palm.generate_text(prompt="The opposite of hot is")
print(response.result)  # cold.

cold.


In [166]:
# Create a new conversation
response = palm.chat(messages='Hello')

# Last contains the model's response:
response.last

'Hello! How can I help you today?'

In [6]:
for model in palm.list_models():
  if 'embedText' in model.supported_generation_methods:
    print(model.name)

models/embedding-gecko-001


In [24]:
models = [m for m in palm.list_models() if 'embedText' in m.supported_generation_methods]

model = models[0]

In [31]:
#let's load the pdf book 
pdf_loader = PyPDFLoader('Books/DLCV/DLCV.pdf')


In [35]:
Book = pdf_loader.load()

In [47]:
text = []
for i in Book:
    text.append(i.page_content)

In [51]:
df = pd.DataFrame(text)
df.columns = ['Text']
df

Unnamed: 0,Text
0,MANNINGMohamed Elgendy
1,Deep Learning for\nVision Systems\nMOHAMED EL...
2,For online information and ordering of this an...
3,"To my mom, Huda, who taught me perseverance a..."
4,
...,...
475,INDEX 455\nperformance metrics (continued)\npr...
476,INDEX 456\nscipy.optimize.fmin_l_bfgs_b method...
477,INDEX 457\ntraining (continued)\npreparing dat...
478,INDEX 458\nvisual embeddings (continued)\nmini...


In [77]:
df[:20]

Unnamed: 0,Text
0,MANNINGMohamed Elgendy
1,Deep Learning for\nVision Systems\nMOHAMED EL...
2,For online information and ordering of this an...
3,"To my mom, Huda, who taught me perseverance a..."
4,
5,vcontents\npreface xiii\nacknowledgments xv\na...
6,CONTENTS vi\n1.5 Image preprocessing 23\nConve...
7,CONTENTS vii\n3 Convolutional neural networks ...
8,CONTENTS viii\n4.5 Improving the network and t...
9,CONTENTS ix\n5.5 Inception and GoogLeNet 217\n...


In [83]:
# get len of chars in the Text

df['chars_len'] = df['Text'].apply(lambda x: len(x))


In [84]:
df.describe()

Unnamed: 0,chars_len
count,480.0
mean,1939.004167
std,659.753035
min,0.0
25%,1520.25
50%,1972.5
75%,2413.0
max,3421.0


In [86]:
# remove Rows which len less that 10

df = df[df['chars_len'] > 10]


In [87]:
#get the text at the index = 4
df['Text'].iloc[4]

'vcontents\npreface xiii\nacknowledgments xv\nabout this book xvi\nabout the author xix\nabout the cover illustration xx\nPART 1DEEP LEARNING  FOUNDATION ............................. 1\n1 Welcome to computer vision 3\n1.1 Computer vision 4\nWhat is visual perception? 5■Vision systems 5\nSensing devices 7■Interpreting devices 8\n1.2 Applications of computer vision 10\nImage classification 10■Object detection and localization 12\nGenerating art (style transfer) 12■Creating images 13\nFace recognition 15■Image recommendation system 15\n1.3 Computer vision pipeline: The big picture 17\n1.4 Image input 19\nImage as functions 19■How computers see images 21\nColor images 21'

In [8]:
models = [m for m in palm.list_models() if 'embedText' in m.supported_generation_methods]
emb_model = models[0]

In [None]:
# save the df to csv file
df.to_csv('Embeddings/DLCV.csv', index=False)

In [21]:
query = "What is loss function ?"



In [22]:
import numpy as np

def find_best_passage(query, dataframe):
    """
    Compute the distances between the query and each document in the dataframe
    using the dot product.
    """
    query_embedding = palm.generate_embeddings(model=emb_model, text=query)
    dot_products = np.dot(np.stack(dataframe['Embeddings']), query_embedding['embedding'])
    idx = np.argmax(dot_products)
    return dataframe.iloc[idx]['Text'] # Return text from index with max value

In [35]:
passage = find_best_passage(query, df)
passage

ValueError: shapes (474,) and (768,) not aligned: 474 (dim 0) != 768 (dim 0)

In [192]:
import textwrap

def make_prompt(query, relevant_passage):
  escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = textwrap.dedent("""You are a helpful and informative bot that answers questions using text from the reference passage included below. \
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
  However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
  strike a friendly and converstional tone and gave example. \
  If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

    ANSWER:
  """).format(query=query, relevant_passage=escaped)

  return prompt

In [193]:
prompt = make_prompt(query, passage)
print(prompt)

You are a helpful and informative bot that answers questions using text from the reference passage included below.   Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.   However, you are talking to a non-technical audience, so be sure to break down complicated concepts and   strike a friendly and converstional tone and gave example.   If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: 'What is loss function ?'
  PASSAGE: '69 Error functions 2.5.1 What is the error function?  The error function  is a measure of how “wrong” the neural network prediction is with respect to the expected output (the label). It quantifies how far we are from the cor- rect solution. For example, if we have a high loss, then our model is not doing a good job. The smaller the loss, the better the job the model is doing. The larger the loss, the more our model needs to be trained to increase its accuracy. 2.5.2 Why do we need an 

In [194]:
text_models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]

text_model = text_models[0]

In [195]:
temperature = 0.5
answer = palm.generate_text(prompt=prompt,
                            model=text_model,
                            candidate_count=3,
                            temperature=temperature,)

In [196]:
for i, candidate in enumerate(answer.candidates):
  print(f"Candidate {i}: {candidate['output']}\n")

Candidate 0: The error function is a measure of how "wrong" the neural network prediction is with respect to the expected output (the label). It quantifies how far we are from the correct solution.



In [197]:
response = palm.generate_text(prompt='''
rewrite this and gave an example for a kid to help him under stand it (Candidate 0: A loss function is a measure of how “wrong” the neural network prediction is with respect to the expected output (the label).)
''')
print(response.result)  # cold.

A loss function is like a grade you get on a test. It tells you how well you did, and it can help you figure out where you need to improve.

For example, let's say you're learning to identify different types of animals. You might be given a picture of a dog, and you have to guess whether it's a golden retriever or a Labrador retriever. If you guess correctly, you get a good grade. But if you guess wrong, you get a bad grade.

The loss function works the same way. It tells you how far off your prediction was from the actual output. If your prediction is close to the output, you get a good grade. But if your prediction is far off, you get a bad grade.

The loss function is important because it helps you track your progress and see where you need to improve. If you're getting a lot of bad grades, it means you need to study more or practice more. And if you're getting a lot of good grades, it means you're doing a great job and you can keep up the good work.


In [199]:
response = palm.generate_text(prompt='''
use this to make a mark down as presntion to explain for childernes (A loss function is like a grade you get on a test. It tells you how well you did, and it can help you figure out where you need to improve.

For example, let's say you're learning to identify different types of animals. You might be given a picture of a dog, and you have to guess whether it's a golden retriever or a Labrador retriever. If you guess correctly, you get a good grade. But if you guess wrong, you get a bad grade.

The loss function works the same way. It tells you how far off your prediction was from the actual output. If your prediction is close to the output, you get a good grade. But if your prediction is far off, you get a bad grade.

The loss function is important because it helps you track your progress and see where you need to improve. If you're getting a lot of bad grades, it means you need to study more or practice more. And if you're getting a lot of good grades, it means you're doing a great job and you can keep up the good work.)
''')
print(response.result)  # cold.

## **Loss Function**

A loss function is like a grade you get on a test. It tells you how well you did, and it can help you figure out where you need to improve.

For example, let's say you're learning to identify different types of animals. You might be given a picture of a dog, and you have to guess whether it's a golden retriever or a Labrador retriever. If you guess correctly, you get a good grade. But if you guess wrong, you get a bad grade.

The loss function works the same way. It tells you how far off your prediction was from the actual output. If your prediction is close to the output, you get a good grade. But if your prediction is far off, you get a bad grade.

The loss function is important because it helps you track your progress and see where you need to improve. If you're getting a lot of bad grades, it means you need to study more or practice more. And if you're getting a lot of good grades, it means you're doing a great job and you can keep up the good work.

Here is a

## **Loss Function**

A loss function is like a grade you get on a test. It tells you how well you did, and it can help you figure out where you need to improve.

For example, let's say you're learning to identify different types of animals. You might be given a picture of a dog, and you have to guess whether it's a golden retriever or a Labrador retriever. If you guess correctly, you get a good grade. But if you guess wrong, you get a bad grade.

The loss function works the same way. It tells you how far off your prediction was from the actual output. If your prediction is close to the output, you get a good grade. But if your prediction is far off, you get a bad grade.

The loss function is important because it helps you track your progress and see where you need to improve. If you're getting a lot of bad grades, it means you need to study more or practice more. And if you're getting a lot of good grades, it means you're doing a great job and you can keep up the good work.

Here is a more mathematical definition of a loss function:

```
L(y, \hat{y}) = \sum_{i=1}^n (y_i - \hat{y}_i)^2
```

where $y$ is the actual output, $\hat{y}$ is the predicted output, and $n$ is the number of data points.

The loss function is a measure of how far off the predicted output is from the actual output. The smaller the loss function, the better the prediction.

Loss functions are used in machine learning to train models. By minimizing the loss function, we can train models to make better predictions.

In [201]:
# Create a new conversation
response = palm.chat(messages='i need you to erite me a class in python',
                     context='your name is (Name:Close Book) you will tell me if i should countiue chat with you or got to ask "Text Model" if the task need a code or many text'
                     
)
# Last contains the model's response:
response.last

'Sure, I can help you write a class in Python. Here is an example of a class that represents a person:\n\n```python\nclass Person:\n    def __init__(self, name, age):\n        self.name = name\n        self.age = age\n\n    def get_name(self):\n        return self.name\n\n    def get_age(self):\n        return self.age\n\n    def set_name(self, new_name):\n        self.name = new_name\n\n    def set_age(self, new_age):\n        self.age = new_age\n\n    def __str__(self):\n        return "Person(name=\'{}\', age={})".format(self.name, self.age)\n```\n\nThis class can be used to create objects that represent people. For example, the following code creates a person object named "John Doe" who is 30 years old:\n\n```python\nperson = Person("John Doe", 30)\n```\n\nThe `get_name()` and `get_age()` methods can be used to retrieve the person\'s name and age, respectively. The `set_name()` and `set_age()` methods can be used to change the person\'s name and age. The `__str__()` method returns 

In [1]:
import pandas as pd

df = pd.read_csv('Embeddings/DLCV.csv')

In [2]:
from model import TextGenerator

text_generator = TextGenerator()

In [3]:
query = "What is loss function ?"

In [4]:
print(text_generator.generate_answer('What is loss function ?', df))

86
69 Error functions
2.5.1 What is the error function? 
The error function  is a measure of how “wrong” the neural network prediction is with
respect to the expected output (the label). It quantifies how far we are from the cor-
rect solution. For example, if we have a high loss, then our model is not doing a good
job. The smaller the loss, the better the job the model is doing. The larger the loss,
the more our model needs to be trained to increase its accuracy.
2.5.2 Why do we need an error function?
Calculating error is an optimization problem, something all machine learning engi-
neers love (mathematicians, too). Optimization problems focus on defining an error
function and trying to optimize its parameters to get the minimum error (more on
optimization in the next section). But for now, know that, in general, when we are
working on an optimization problem, if we are able to define the error function for
the problem, we have a very good shot at solving it by running optimization a

In [5]:
df.iloc[86]['Text'] 

'69 Error functions\n2.5.1 What is the error function? \nThe error function  is a measure of how “wrong” the neural network prediction is with\nrespect to the expected output (the label). It quantifies how far we are from the cor-\nrect solution. For example, if we have a high loss, then our model is not doing a good\njob. The smaller the loss, the better the job the model is doing. The larger the loss,\nthe more our model needs to be trained to increase its accuracy.\n2.5.2 Why do we need an error function?\nCalculating error is an optimization problem, something all machine learning engi-\nneers love (mathematicians, too). Optimization problems focus on defining an error\nfunction and trying to optimize its parameters to get the minimum error (more on\noptimization in the next section). But for now, know that, in general, when we are\nworking on an optimization problem, if we are able to define the error function for\nthe problem, we have a very good shot at solving it by running opt

In [6]:
print(text_generator.zero_shot("""
who are you ? 
note: answer in markdown format to make the answer simple and looks good
"""))
                               

```
# Close-Book: An AI tool to help CS students in their studies

Close-Book is an AI tool that helps CS students in their studies. It provides a variety of features to help students learn and understand CS concepts, including:

* **Interactive tutorials:** Close-Book provides interactive tutorials that allow students to learn CS concepts by doing.
* **Code generation:** Close-Book can generate code for CS students, helping them to learn how to write code and debug their programs.
* **Question answering:** Close-Book can answer questions about CS concepts, helping students to test their understanding of the material.
* **Live help:** Close-Book can provide live help to students, answering their questions and helping them to solve problems.

Close-Book is a powerful tool that can help CS students to learn and understand CS concepts more effectively. It is available as a web app and as a mobile app.
```


# Close-Book: An AI tool to help CS students in their studies

## What is Close-Book?

Close-Book is an AI-powered tutoring tool that helps CS students learn and understand complex concepts. It provides students with personalized feedback and suggestions, and helps them track their progress.

## How does Close-Book work?

Close-Book uses a variety of AI techniques to help students learn, including:

* **Natural language processing:** Close-Book can understand the questions that students ask, and provide relevant and helpful answers.
* **Machine learning:** Close-Book can track students' progress over time, and identify areas where they need additional help.
* **Recommendation systems:** Close-Book can recommend resources and activities that are tailored to each student's individual needs.

## What are the benefits of using Close-Book?

Close-Book can help CS students in a number of ways, including:

* **Improved learning outcomes:** Close-Book can help students learn and understand complex concepts more quickly and effectively.
* **Increased engagement:** Close-Book can make learning more engaging and interesting for students.
* **Reduced stress:** Close-Book can help students feel more confident and less stressed about their studies.

## How can I use Close-Book?

Close-Book is available as a web app and a mobile app. To use Close-Book, simply sign up for an account and start asking questions. Close-Book will provide you with personalized feedback and suggestions, and help you track your progress.

## Conclusion

Close-Book is an AI-powered tutoring tool that can help CS students learn and understand complex concepts. It provides students with personalized feedback and suggestions, and helps them track their progress. If you're a CS student, I encourage you to try Close-Book today!


# Close-Book: An AI tool to help CS students in their studies

I am an AI tool that can help CS students in their studies. I can provide a variety of resources, including:

* **Lecture notes:** I can generate lecture notes based on the content of a lecture.
* **Code examples:** I can provide code examples that illustrate how to implement different algorithms and data structures.
* **Quiz questions:** I can generate quiz questions that test students' understanding of the material.
* **Grading:** I can grade students' assignments and provide feedback.

I can also help students with their research by:

* **Identifying relevant research papers:** I can help students identify research papers that are relevant to their research topic.
* **Summarizing research papers:** I can summarize research papers so that students can quickly get the main points.
* **Generating research proposals:** I can help students generate research proposals that are well-written and persuasive.

I am still under development, but I am learning new things every day. I am excited to help CS students succeed in their studies!

In [6]:
print(text_generator.zero_shot("""
what is loss function?
note: answer in markdown format to make the answer simple and looks good
"""))
                               

**Close-Book**, an AI tool to help CS students in their studies.

**What is a loss function?**

A loss function is a function that measures the difference between the predicted output of a model and the desired output. It is used to evaluate the performance of a model and to guide its training.

Loss functions are typically used in supervised learning, where the model is trained on a dataset of labeled data. The loss function is used to calculate the error between the model's predictions and the labels, and this error is used to update the model's parameters.

There are many different types of loss functions, each of which is suited to a different type of problem. Some common loss functions include:

* Mean squared error (MSE): This is a simple loss function that measures the squared difference between the predicted output and the desired output.
* Cross-entropy loss: This is a loss function that is often used for classification problems. It measures the difference between the predicte

**Close-Book**, an AI tool to help CS students in their studies.

**What is a loss function?**

A loss function is a function that measures the difference between the predicted output of a model and the desired output. It is used to evaluate the performance of a model and to guide its training.

Loss functions are typically used in supervised learning, where the model is trained on a dataset of labeled data. The loss function is used to calculate the error between the model's predictions and the labels, and this error is used to update the model's parameters.

There are many different types of loss functions, each of which is suited to a different type of problem. Some common loss functions include:

* Mean squared error (MSE): This is a simple loss function that measures the squared difference between the predicted output and the desired output.
* Cross-entropy loss: This is a loss function that is often used for classification problems. It measures the difference between the predicted probability distribution and the true probability distribution.
* Kullback-Leibler divergence (KLD): This is a loss function that is often used for measuring the similarity between two probability distributions.

The choice of loss function is an important one, as it can have a significant impact on the performance of the model. It is important to choose a loss function that is appropriate for the problem being solved.

Here is an example of a loss function for a simple linear regression model:

```
def loss_function(y_pred, y_true):
  """Calculates the mean squared error loss."""
  return np.mean((y_pred - y_true)**2)
```

This loss function calculates the squared difference between the predicted output and the desired output, and then averages the results across all of the data points in the dataset. This gives a single value that represents the overall error of the model.

Loss functions are an essential part of machine learning, and they play a key role in training models to perform well on a given task. By choosing the right loss function, you can improve the performance of your models and get better results.

In [63]:
model == emb_model

True

In [65]:
df['Embeddings'] = df['Embeddings'].apply(ast.literal_eval)

In [66]:
# make a query embbeding
query_embedding = palm.generate_embeddings(model=emb_model, text=query)['embedding']

In [42]:
print(query_embedding)

[0.012159031, 0.016000608, -0.07278232, 0.030883648, 0.042206768, 0.00586389, 0.0550859, -0.022852989, -0.020130193, 0.012802268, -0.016075468, 0.03568756, 0.022842498, -0.013605033, -0.010501474, -0.011401135, -0.06247971, -0.03929676, 0.039359335, 0.0037990757, -0.0747627, -0.0034343603, -0.022561323, -0.0017256979, 0.011200434, -0.085254736, 0.04832489, -0.028571256, -0.03516599, -0.0059107984, 0.03864997, 0.020890031, -0.025669849, -0.022951817, -0.039302796, -0.00031840187, -0.06994034, 0.034783173, 0.014722771, 0.0048771105, 0.0062925187, -0.017179891, -0.005620216, 0.025892738, -0.028290072, 0.018652376, -0.092221655, 0.02883248, 0.02158293, -0.04598464, 0.038186383, -0.0130881, -0.036388054, 0.025500996, -0.022145448, 0.016600447, -0.02144096, 0.00028106847, -0.086297065, -0.013628242, 0.0124761835, 0.014433538, 0.02589808, -0.023147896, -0.046706744, 0.085142426, 0.03182272, 0.0028626688, 0.03747989, -0.0062778792, 0.013922257, 0.04080264, 0.012881377, 0.0025034803, 0.08089112

In [38]:
len(query_embedding)

768

In [67]:
len(df['Embeddings'][0])

768

In [47]:
x = df['Embeddings'][0]
x

'[0.0010041381, 0.023556152, -0.02987321, 0.060834404, 0.046326395, -0.02630725, 0.05149295, 0.0015270954, -0.018965637, 0.008094878, 0.012878272, -0.033970058, 0.040260054, -0.007773982, -0.030728519, -0.0038723545, -0.02165675, -0.048108336, 0.029629802, 0.025192019, -0.06654851, -0.005221891, 0.0036930973, 0.026820462, 0.0017714327, -0.064580396, 0.012046766, -0.049813602, -0.03230159, -0.008825886, 0.014978497, 0.029564464, -0.013146918, -0.01330665, 0.04299503, 0.035856884, -0.004783798, 0.013250478, -0.0061364244, 0.047982816, -0.029000968, -0.068525404, 0.04028211, -0.0036428156, -0.016990067, -0.041494105, -0.024780551, -0.028861785, -0.014957387, -0.06511688, 0.0026707454, -0.05473773, 0.034465656, 0.023149969, 0.03929322, 0.034097876, -0.02163697, -0.030744692, -0.068379305, -0.014454069, 0.025074236, -0.000844103, 0.016302546, -0.03107995, -0.008877413, 0.025613071, -0.0059933336, -0.019427288, 0.016761009, -0.033954676, -0.002195716, 0.025517033, -0.041232422, -0.026811454,

In [48]:
import ast
x = ast.literal_eval(x)

In [50]:
len(x)

768

In [2]:
from langchain.document_loaders import UnstructuredURLLoader  #load urls into docoument-loader
urls = ['https://www.linkedin.com/pulse/transformers-without-pain-ibrahim-sobh-phd/','https://www.linkedin.com/pulse/transformers-without-pain-ibrahim-sobh-phd/']
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

In [3]:
import pandas as pd
loader = UnstructuredURLLoader(urls=urls)
text = loader.load()
text = text[0].page_content
text_dict = {'Text': [text]}
df = pd.DataFrame(text_dict)
# df.columns = ['Text']        
# df['Embeddings'] = df['Text'].apply(self.make_embeddings)
df.head()

KeyboardInterrupt: 

In [None]:
df.Text[0]

'Transformers without pain \uf8ffü§ó\n\nReport this article\n\nIbrahim Sobh - PhD\n\nIbrahim Sobh - PhD\n\n\uf8ffüéì Senior Expert of Artificial Intelligence, Valeo Group | Machine Learning | Deep Learning | Data Science | Computer Vision | NLP | Lecturer | Developer | Researcher \uf8ffüìù\n\nPublished Jan 4, 2021\n\n+ Follow\n\nContents:\n\nWhat is wrong with RNNs and CNNs\n\nA High-Level Look\n\nMachine Translation Task\n\nArchitecture main components\n\nWhat is attention? Where is attention?\n\nHow to represent the order of words without RNNs?\n\nGenerating words\n\nThe big picture\n\nThe future is here\n\nHow to start?\n\n1) What is wrong with RNNs and CNNs\n\nLearning Representations of Variable Length Data is a basic building block of sequence-to-sequence learning for Neural machine translation, summarization, etc\n\nRecurrent Neural Networks are natural fit variable-length sentences and sequences of pixels. But sequential computation inhibits parallelization. No explicit modelin

In [None]:
def make_embeddings(text):
    return palm.generate_embeddings(model=emb_model,text=text)['embedding']


In [None]:
models = [m for m in palm.list_models() if 'embedText' in m.supported_generation_methods]
emb_model = models[0]

In [None]:
# df['Embeddings'] = df['Text'].apply(make_embeddings)

InvalidArgument: 400 Request payload size exceeds the limit: 10000 bytes.

In [40]:
models = [m for m in palm.list_models() if 'embedText' in m.supported_generation_methods]
emb_model = models[0]

In [15]:
from langchain.text_splitter import CharacterTextSplitter #text splitter

loader = UnstructuredURLLoader(urls=urls)
texts = loader.load()
text_list = []
for text in texts:
    text_list.append(text.page_content)
splitter = CharacterTextSplitter(chunk_size=1000)
pargraphs = splitter.create_documents(text_list)
pargraphs_text=[]
for pra in pargraphs:
    pargraphs_text.append(pra.page_content)
text = {'Text':pargraphs_text}
df = pd.DataFrame(text)
# df['Embeddings'] = df['Text'].apply(make_embeddings)
df.head()


Unnamed: 0,Text
0,Transformers without pain ü§ó\n\nReport this ...
1,Convolutional Neural Networks are trivial to p...
2,4) Architecture main components\n\nEncoder com...
3,An example fo using attention for machine tran...
4,Each word is projected into three other vector...


In [16]:
df['Embeddings'] = df['Text'].apply(make_embeddings)


In [1]:
urls_text_list = ['https://www.linkedin.com/pulse/transformers-without-pain-ibrahim-sobh-phd/','https://www.linkedin.com/pulse/transformers-without-pain-ibrahim-sobh-phd/']


In [2]:
from model import TextGenerator
text_generator = TextGenerator()
df = text_generator.make_urls_df(urls_text_list)
# ans = text_generator.get_genrate_url_answer(query, df)

In [3]:
df

Unnamed: 0,Text,Embeddings
0,Transformers without pain ü§ó\n\nReport this ...,"[0.0010315055, -0.028580409, -0.0076014455, 0...."
1,Convolutional Neural Networks are trivial to p...,"[-0.0009383782, -0.0289818, -0.023477007, 0.04..."
2,4) Architecture main components\n\nEncoder com...,"[-0.020651396, -0.01635578, -0.024015633, 0.05..."
3,An example fo using attention for machine tran...,"[-0.0097579835, -0.008790414, -0.018799463, 0...."
4,Each word is projected into three other vector...,"[-0.01479975, -0.025010662, -0.04422533, 0.020..."
5,"For example, a word w may attend to word x in ...","[-0.009083958, -0.02020066, -0.030411098, 0.00..."
6,"As shown, we have 3 connected arrows in the fi...","[-0.00903019, -0.008585256, -0.031742916, 0.02..."
7,Think of the positional encoders as a simple l...,"[-0.015467625, -0.03785381, -0.04217757, 0.004..."
8,8) The big picture\n\nAttention is all you nee...,"[-0.018798167, -0.013468233, -0.01657415, 0.04..."
9,"For Example, ""Pre-training of Deep Bidirection...","[-0.002645978, -0.012628271, 0.0039097792, 0.0..."


In [8]:
import ast

query = 'What is Machine Translation?'
# change the Embeding column type to object 
# df['Embeddings'] = df['Embeddings'].astype(object)
ans = text_generator.get_genrate_url_answer(query)

Machine translation (MT) is the process of converting a text from one language to another. It is a subfield of computational linguistics, and has applications in many areas, such as translation of documents, websites, and software.

There are two main types of machine translation systems: statistical and rule-based. Statistical MT systems use statistical techniques to learn the relationship between words and phrases in two languages, and then use this information to generate translations. Rule-based MT systems use a set of rules to translate text from one language to another.

MT systems have improved significantly in recent years, but they still make mistakes. Some of the most common errors include:

* Mistranslation of idioms and other figurative language
* Mistranslation of proper nouns
* Mistranslation of words with multiple meanings
* Inaccurate translation of complex sentences

Despite these limitations, MT systems are still very useful tools for translating text between language