<a href="https://colab.research.google.com/github/9158764767/Avaition-management/blob/master/Assignment2_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from bs4 import BeautifulSoup
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Read input text from a file
with open('input.txt', 'r', encoding='utf-8') as file:
    input_text = file.read()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [2]:
# Preprocessing function
def preprocess_text(text):
    soup = BeautifulSoup(text, "html.parser")
    text = soup.get_text(separator=" ")  # Remove HTML tags
    text = re.sub(r"[^a-zA-Z\s]", "", text)  # Remove non-alphabetic characters
    tokens = word_tokenize(text.lower())  # Tokenize and convert to lower case
    tokens = [token for token in tokens if token.isalnum()]  # Alphanumeric filter
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]  # Stopword removal
    lemmatizer = WordNetLemmatizer()
    lemmatized = [lemmatizer.lemmatize(token) for token in tokens]  # Lemmatization
    return " ".join(lemmatized)

processed_input = preprocess_text(input_text)

# Context Window Slicing Algorithm
def generate_slices(input_text, context_window_size=128):
    context_window_bytes = context_window_size * 1024  # Adjust byte size for example
    words = processed_input.split()
    slices = []
    current_slice = ""
    for word in words:
        if len(current_slice.encode('utf-8')) + len(word.encode('utf-8')) <= context_window_bytes:
            current_slice += " " + word
        else:
            slices.append(current_slice.strip())
            current_slice = word
    if current_slice:
        slices.append(current_slice.strip())

    # Enhance slice differentiation using cosine similarity
    final_slices = [slices[0]]
    vectorizer = TfidfVectorizer()
    for i in range(1, len(slices)):
        tfidf_matrix = vectorizer.fit_transform([final_slices[-1], slices[i]])
        cosine_dist = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
        if cosine_dist < 0.2:  # Threshold for differentiation
            final_slices.append(slices[i])

    return final_slices

slices = generate_slices(input_text)
print(slices)  # Print the generated slices


['exploration space stand one humanity greatest achievement moon landing marked pinnacle space race current endeavor aim even higher targeting mar beyond advancement rocket technology satellite system unmanned spacecraft opened new frontier scientific discovery potential human settlement researcher engineer around world collaborate overcome physical technological challenge interstellar travel cosmic radiation life support system sustainable food production space realm economics st century witnessed seismic shift towards globalization digital transaction rise cryptocurrencies blockchain technology challenge traditional banking system fiat currency proposing new era decentralized finance economist debate implication digital currency global financial stability autonomy national economy meanwhile international trade agreement tariff continue shape economic landscape country influencing job market industry growth consumer price cultural tapestry world rich diverse community contributing uni

In [3]:
# Save slices to a file
with open('slices_output.txt', 'w', encoding='utf-8') as output_file:
    for i, slice_text in enumerate(slices):
        output_file.write(f"Slice {i + 1}: {slice_text}\n")

In [4]:
! pip install replicate
! pip install --upgrade requests urllib3


import replicate
# Authenticate with Replicate API and define the model
REPLICATE_API_TOKEN = "r8_aiebuYTaLBZIzYv8whiaYlsKcqLH43p2nexiP"
client = replicate.Client(api_token=REPLICATE_API_TOKEN)
model_name = "meta/llama-2-70b-chat"

Collecting replicate
  Downloading replicate-0.25.2-py3-none-any.whl (39 kB)
Collecting httpx<1,>=0.21.0 (from replicate)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.21.0->replicate)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.21.0->replicate)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, replicate
Successfully installed h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 replicate-0.25.2
Collecting urllib3
  Downloading urllib3-2.2.1-py3-none-any.whl (121 kB)
[2K     

In [5]:
# Read the sliced text
with open('slices_output.txt', 'r', encoding='utf-8') as input_file:
    slice_text = input_file.read()

# Initialize the conversation with the model using the sliced text
print("Initializing the conversation with the model...")
initial_response = client.stream(
    model_name,
    input={
        "prompt": f"Initial Input:\n\n{slice_text}\n\nUser Input: "
    }
)

# Initialize the conversation with the model using the sliced text
print("Initializing the conversation with the model...")
try:
    # Using the predict method if available, or adjust based on actual available method
    response = client.predict(model_name, input={"prompt": f"Initial Input:\n\n{slice_text}\n\nUser Input: "})
    print(response)
except AttributeError as e:
    print("Failed to interact with the model:", e)




# Retrieve and store the initial model output
user_input = ""
for event in initial_response:
    user_input += str(event)  # Collect initial output to continue the conversation
    print(event, end="")  # Display the model's initial output


Initializing the conversation with the model...
Initializing the conversation with the model...
Failed to interact with the model: 'Client' object has no attribute 'predict'
 Sure, I can help you with that. Here's a summary of the input you provided:

The moon landing was a significant achievement for humanity, representing the pinnacle of the space race and opening up new frontiers for scientific discovery and potential human settlement. However, there are still physical and technological challenges to overcome, such as cosmic radiation, life support systems, and sustainable food production. The rise of cryptocurrencies and blockchain technology has challenged traditional banking and financial systems, with economists debating the implications for global financial stability and national autonomy.


In [6]:
# Ask the user for their question
user_question = input("\nYou: ")

# Continue the conversation based on the user's question
print("\nAsking your question to the model...")
response = client.stream(
    model_name,
    input={
        "prompt": f"{user_input}\n\nUser Question: {user_question}"
    }
)

# Print the model's response to the user's question
for event in response:
    print(event, end="")



You: what is moon

Asking your question to the model...
 The moon is the natural satellite of Earth, orbiting our planet at an average distance of about 239,000 miles (384,000 kilometers). It is the fifth-largest satellite in the solar system and the largest satellite relative to the size of its planet. The moon has a diameter of about 2,159 miles (3,475 kilometers), which is about one-quarter the size of Earth.

The moon is a rocky, airless body with no atmosphere, and its surface is characterized by mountains, craters, and

In [8]:
import replicate

# Authenticate with Replicate API and define the model
REPLICATE_API_TOKEN = "r8_aiebuYTaLBZIzYv8whiaYlsKcqLH43p2nexiP"
client = replicate.Client(api_token=REPLICATE_API_TOKEN)
model_name = "meta/llama-2-70b-chat"

# Read the sliced text
with open('slices_output.txt', 'r', encoding='utf-8') as input_file:
    slice_text = input_file.read()

# Assuming we found that the correct method is `run` or similar
print("Initializing the conversation with the model...")
try:
    initial_response = client.run(model_name, {"prompt": f"Initial Input:\n\n{slice_text}\n\nUser Input: "})
    print("Response:", initial_response)
except AttributeError as e:
    print("Failed to interact with the model:", e)

# Assuming initial_response is iterable if correct method is used
user_input = ""
for event in initial_response:
    user_input += str(event)  # Collect initial output to continue the conversation
    print(event, end="")  # Display the model's initial output

# Ask the user for their question
user_question = input("\nYou: ")

# Continue the conversation based on the user's question
print("\nAsking your question to the model...")
try:
    response = client.run(model_name, {"prompt": f"{user_input}\n\nUser Question: {user_question}"})
    for item in response:
        print(item, end="")  # Assuming response is iterable
except AttributeError as e:
    print("Failed to interact with the model:", e)


Initializing the conversation with the model...
Response: [' Thank', ' you', ' for', ' the', ' input', '.', ' It', ' appears', ' to', ' be', ' a', ' collection', ' of', ' various', ' topics', ' and', ' issues', ' that', ' are', ' currently', ' being', ' discussed', ' in', ' the', ' world', '.', ' It', "'", 's', ' cru', 'cial', ' to', ' approach', ' these', ' subjects', ' with', ' care', ' and', ' consideration', ',', ' taking', ' into', ' account', ' the', ' eth', 'ical', ',', ' soci', 'etal', ',', ' and', ' environmental', ' effects', ' they', ' may', ' have', '.', '\n', '\n', 'In', ' terms', ' of', ' techn', 'ological', ' development', ',', ' it', ' is', ' cru', 'cial', ' to', ' invest', ' in', ' cutting', '-', 'edge', ' techn', 'ologies', ' including', ' artificial', ' intelligence', ',', ' robot', 'ics', ',', ' and', ' gen', 'et', 'ics', '.', ' These', ' techn', 'ologies', ' have', ' the', ' potential', ' to', ' significantly', ' enh', 'ance', ' product', 'ivity', ' across', ' a', 

In [13]:
import replicate

# Authenticate with Replicate API and define the model
REPLICATE_API_TOKEN = "r8_aiebuYTaLBZIzYv8whiaYlsKcqLH43p2nexiP"
client = replicate.Client(api_token=REPLICATE_API_TOKEN)
model_name = "meta/llama-2-70b-chat"

# Read the sliced text
with open('slices_output.txt', 'r', encoding='utf-8') as input_file:
    slice_text = input_file.read()

# Initialize the conversation with the model using the sliced text
print("Initializing the conversation with the model...")
try:
    initial_response = client.run(model_name, {"prompt": f"Initial Input:\n\n{slice_text}\n\nUser Input: "})
    print("Model Response:")
    for event in initial_response:
        print(event, end="")  # Display the model's initial output
except AttributeError as e:
    print("Failed to interact with the model:", e)

# Loop for continuous conversation
while True:
    # Ask the user for their question
    user_question = input("\nYou: ")

    # Continue the conversation based on the user's question
    print("\nAsking your question to the model...")
    try:
        response = client.run(model_name, {"prompt": f"{user_input}\n\nUser Question: {user_question}"})
        print("Model Response:")
        for item in response:
            print(item, end="")  # Display the model's response
    except AttributeError as e:
        print("Failed to interact with the model:", e)

    # Check if the user wants to continue the conversation
    continue_conversation = input("\nDo you want to continue the conversation? (yes/no): ")
    if continue_conversation.lower() != 'yes':
        print("Exiting the conversation.")
        break


Initializing the conversation with the model...


ReplicateError: ReplicateError Details:
title: Free time limit reached
status: 402
detail: You have reached the free time limit. To continue using Replicate, set up billing at https://replicate.com/account/billing#billing.