In [4]:
import spacy
from spacy.training import Example
from spacy.matcher import Matcher
from spacy.tokens import Span
from spacy.util import filter_spans
import random
from pathlib import Path
from itertools import chain
from spacy.util import minibatch, compounding
from pathlib import Path
from train_data import TRAIN_DATA

In [None]:
def train_ner(model_dir="./ner_model", new_data=TRAIN_DATA, n_iter=10):
    # Load the pre-trained "en_core_web_sm" model
    if Path(model_dir).exists():
        print(f"Loading existing model from: {model_dir}")
        nlp = spacy.load(model_dir)  # Load the existing model
    else:
        print("Loading the pre-trained 'en_core_web_sm' model")
        nlp = spacy.load("en_core_web_sm")  # Load the pre-trained model if none exists
        if "ner" not in nlp.pipe_names:
            ner = nlp.add_pipe("ner", last=True)
        else:
            ner = nlp.get_pipe("ner")
        # Add new entity labels to the NER model
        for _, annotations in new_data:
            for ent in annotations.get("entities"):
                ner.add_label(ent[2])

    # Continue training the model
    if "ner" in nlp.pipe_names:
        ner = nlp.get_pipe("ner")
    
    # Disable other pipeline components during training to focus training on the NER component
    with nlp.disable_pipes(*[pipe for pipe in nlp.pipe_names if pipe != "ner"]):
        optimizer = nlp.resume_training()
        for itn in range(n_iter):
            losses = {}
            batches = minibatch(new_data, size=compounding(4., 32., 1.001))
            for batch in batches:
                for text, annotations in batch:
                    doc = nlp.make_doc(text)
                    example = Example.from_dict(doc, annotations)
                    nlp.update([example], drop=0.5, losses=losses, sgd=optimizer)
            print(f"Losses at iteration {itn}: {losses}")

    # Save the updated model
    nlp.to_disk(model_dir)
    print(f"Saved model to: {model_dir}")

if __name__ == "__main__":
    train_ner()


Loading existing model from: ./ner_model


In [3]:
import spacy
import json
def predict(text):
    nlp = spacy.load("./ner_model")  # Ensure this path is correct
    doc = nlp(text)
    for token in doc:
        print(token.text, token.ent_type_)
    for ent in doc.ents:
        print("Entity:", ent.text, ent.label_)

if __name__ == "__main__":
    predict_text = "I like Fornite"
    predict_to_json(predict_text, "prediction_output.json")
    predict(predict_text)


NameError: name 'predict_to_json' is not defined

In [43]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Note: you may need to restart the kernel to use updated packages.Looking in indexes: https://download.pytorch.org/whl/cu118

Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.2.1%2Bcu118-cp39-cp39-win_amd64.whl (2704.2 MB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.17.1%2Bcu118-cp39-cp39-win_amd64.whl (4.9 MB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.2.1%2Bcu118-cp39-cp39-win_amd64.whl (4.0 MB)
Installing collected packages: torch, torchvision, torchaudio
Successfully installed torch-2.2.1+cu118 torchaudio-2.2.1+cu118 torchvision-0.17.1+cu118


In [15]:
# Import necessary libraries
from transformers import AutoModel, AutoTokenizer
import torch
from scipy.spatial.distance import cosine
from sentence_transformers import SentenceTransformer, InputExample, losses, evaluation
from torch.utils.data import DataLoader
import pandas as pd
from sentence_transformers import evaluation

# Simulated dataset: Normally, you'd load a dataset from a file
data = [
    {"sentence1": "move the ball", "sentence2": "pass", "similarity": 0.9},
    {"sentence1": "move the ball", "sentence2": "walk", "similarity": 0.1},
    {"sentence1": "move the ball", "sentence2": "punt", "similarity": 0.8},
    {"sentence1": "move the ball", "sentence2": "run", "similarity": 0.2},
]

# Convert the simulated dataset to a pandas DataFrame
df = pd.DataFrame(data)

# Convert the DataFrame to a list of InputExample objects
examples = [InputExample(texts=[row['sentence1'], row['sentence2']], label=row['similarity']) for index, row in df.iterrows()]

# Initialize DataLoader
train_dataloader = DataLoader(examples, shuffle=True, batch_size=2)

# Initialize the model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define the training method using cosine similarity loss
train_loss = losses.CosineSimilarityLoss(model=model)

# Assuming a very small dataset, let's skip validation for this example.
# In a real scenario, you should split your data and use a validation set.

# Fine-tune the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          epochs=4,  # Adjust epochs based on your dataset size and complexity
          warmup_steps=100,
          output_path="./fine-tuned-model"  # Change this to where you want to save your model
         )

# Load the fine-tuned model (optional if you continue using the same model object)
model = SentenceTransformer("./fine-tuned-model")

# Now you can use the model as before to generate embeddings and calculate similarities
# The target phrase and phrases to compare
target_phrase = "kick the ball"
phrases = ["walk", "pass", "punt", "run"]
phrases_button = ["A","X","Y","B"]

# Generate embeddings for each phrase
target_embedding = model.encode(target_phrase)
phrase_embeddings = model.encode(phrases)

# Calculate and print the cosine similarity between the target and each phrase
similarities = {}
for phrase, embedding in zip(phrases, phrase_embeddings):
    # Compute cosine similarity (note: 1 - cosine distance to get similarity)
    similarity = 1 - cosine(target_embedding, embedding)
    similarities[phrase] = similarity

# Find the most similar phrase
most_similar_phrase = max(similarities, key=similarities.get)
print(f"The phrase most similar to '{target_phrase}' is: '{most_similar_phrase}' with a similarity score of {similarities[most_similar_phrase]:.4f}")

# Optional: print all similarities for comparison
for phrase, similarity in similarities.items():
    print(f"Similarity to '{phrase}': {similarity:.4f}")


Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2 [00:00<?, ?it/s]

The phrase most similar to 'move the ball' is: 'punt' with a similarity score of 0.4584
Similarity to 'walk': 0.3673
Similarity to 'pass': 0.3767
Similarity to 'punt': 0.4584
Similarity to 'run': 0.3102
Similarity to 'kick': 0.4556


In [24]:
import spacy

nlp = spacy.load("en_core_web_sm")  # Loading the model

def get_similar_words(word, topn=5):
    # Process the word to get its vector
    queried_token = nlp(word)[0]
    
    if queried_token.has_vector:
        similarities = []
        for token in nlp.vocab:
            # Check if the token has a vector and matches the casing
            if token.has_vector and token.is_lower == queried_token.is_lower:
                similarity = queried_token.similarity(token)
                if similarity > 0.2:  # Threshold for similarity
                    similarities.append((token.text, similarity))
        similarities = sorted(similarities, key=lambda item: item[1], reverse=True)
        return [word for word, similarity in similarities[:topn]]
    else:
        return []

# Example usage
similar_words = get_similar_words("hello")
print(similar_words)


[]


In [33]:
import time
start = time.time()
from deepmultilingualpunctuation import PunctuationModel
model = PunctuationModel()
text = "I want to play FIFA with my body I want my right hand to control movement of the player I want to put my fist up to pass the ball I want to sprint when I show three finger I also want to kick the ball when I Kick In Real Life"
result = model.restore_punctuation(text)
print(result)
end = time.time()
print(end - start)

I want to play FIFA with my body. I want my right hand to control movement of the player. I want to put my fist up to pass the ball. I want to sprint when I show three finger. I also want to kick the ball when I Kick In Real Life.
7.02576208114624


In [1]:
python -V

NameError: name 'python' is not defined

In [2]:
conda

usage: conda-script.py [-h] [-V] command ...

conda is a tool for managing and deploying applications, environments and packages.

Options:

positional arguments:
  command
    clean        Remove unused packages and caches.
    compare      Compare packages between conda environments.
    config       Modify configuration values in .condarc. This is modeled
                 after the git config command. Writes to the user .condarc
                 file (C:\Users\ASUS\.condarc) by default.
    create       Create a new conda environment from a list of specified
                 packages.
    help         Displays a list of available conda commands and their help
                 strings.
    info         Display information about current conda install.
    init         Initialize conda for shell interaction. [Experimental]
    install      Installs a list of packages into a specified conda
                 environment.
    list         List linked packages in a conda environment.
    p