In [1]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install faiss-cpu
!pip install -U sentence-transformers

zsh:1: no matches found: transformers[sentencepiece]


In [None]:
import pandas as pd
from datasets import load_dataset
from datasets import Dataset
import numpy as np
import pickle

In [None]:


ds = load_dataset("sentence-transformers/squad")
ds

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 87599
    })
})

In [66]:
# Convert the dataset to a pandas DataFrame
df = ds["train"].to_pandas()

# Drop duplicate rows based on the 'answer' column
df = df.drop_duplicates(subset='answer')

# Convert the DataFrame back to a Dataset
ds_cleaned = Dataset.from_pandas(df)

# Update the dataset dictionary
ds["train"] = ds_cleaned

ds_cleaned

Dataset({
    features: ['question', 'answer', '__index_level_0__'],
    num_rows: 18891
})

In [128]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

In [69]:
questions = ds_cleaned["question"]
answers = ds_cleaned["answer"]

In [70]:
# Get the last question and its corresponding answer
last_question = questions[-1]
last_answer = answers[-1]

print("Last Question:", last_question)
print("Ground Truth Answer:", last_answer)

Last Question: In what US state did Kathmandu first establish an international relationship?
Ground Truth Answer: Kathmandu Metropolitan City (KMC), in order to promote international relations has established an International Relations Secretariat (IRC). KMC's first international relationship was established in 1975 with the city of Eugene, Oregon, United States. This activity has been further enhanced by establishing formal relationships with 8 other cities: Motsumoto City of Japan, Rochester of the USA, Yangon (formerly Rangoon) of Myanmar, Xi'an of the People's Republic of China, Minsk of Belarus, and Pyongyang of the Democratic Republic of Korea. KMC's constant endeavor is to enhance its interaction with SAARC countries, other International agencies and many other major cities of the world to achieve better urban management and developmental programs for Kathmandu.


In [71]:
answer_embeddings = model.encode(answers, convert_to_tensor=True)

# Save the answer embeddings to a pickle file
with open("answer_embeddings.pkl", "wb") as f:
    pickle.dump(answer_embeddings, f)


In [77]:
import pickle

with open("answer_embeddings.pkl", "rb") as f:
    answer_embeddings = pickle.load(f)

In [78]:
answer_embeddings

tensor([[ 0.0920, -0.1151, -0.0925,  ..., -0.1539,  0.0199, -0.3485],
        [ 0.0312,  0.0178,  0.1020,  ..., -0.2624, -0.2836,  0.0984],
        [-0.0151,  0.3671, -0.0054,  ..., -0.1638,  0.1597, -0.0239],
        ...,
        [ 0.0623, -0.3622, -0.1559,  ..., -0.2562, -0.5400, -0.5379],
        [ 0.5040, -0.7157, -0.1107,  ..., -0.2558, -0.3753, -0.2565],
        [ 0.1656, -0.1617, -0.0214,  ..., -0.1522, -0.4290, -0.3922]],
       device='mps:0')

In [79]:
import torch

def search_query_top_k(query, corpus_embedding, corpus, top_k=5):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = model.similarity(query_embedding, corpus_embedding)[0]
    scores, indices = torch.topk(cosine_scores, k=top_k)
    print("\nQuery:", query)
    print("Top 5 most similar sentences in corpus: \n")


    for i, (score, idx) in enumerate(zip(scores, indices), 1):
        print(f"{i}. (Score: {score:.4f}) index: {idx} \n", corpus[idx])
        print()

In [80]:
first_question = questions[0]
print("First Question:", first_question)

First Question: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?


In [82]:
search_query_top_k(first_question, answer_embeddings, answers, top_k=5)


Query: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Top 5 most similar sentences in corpus: 

1. (Score: 21.6458) index: 0 
 Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.

2. (Score: 19.7955) index: 6907 
 The Gospel of Luke begins its account of Mary's life with the Annunciation, when the angel Gabriel appeared to her a

In [87]:
len(answers) == len(answer_embeddings)

True

In [88]:
#get random question
random = questions[-1]

In [89]:
search_query_top_k(random, answer_embeddings, answers, top_k=5)


Query: In what US state did Kathmandu first establish an international relationship?
Top 5 most similar sentences in corpus: 

1. (Score: 24.8097) index: 18890 
 Kathmandu Metropolitan City (KMC), in order to promote international relations has established an International Relations Secretariat (IRC). KMC's first international relationship was established in 1975 with the city of Eugene, Oregon, United States. This activity has been further enhanced by establishing formal relationships with 8 other cities: Motsumoto City of Japan, Rochester of the USA, Yangon (formerly Rangoon) of Myanmar, Xi'an of the People's Republic of China, Minsk of Belarus, and Pyongyang of the Democratic Republic of Korea. KMC's constant endeavor is to enhance its interaction with SAARC countries, other International agencies and many other major cities of the world to achieve better urban management and developmental programs for Kathmandu.

2. (Score: 21.8178) index: 18843 
 The Gorkha Kingdom ended the Mall

## C FAISS INDEX

In [96]:
# Create a dataset from the answers
answers_dataset = Dataset.from_dict({"answers": ds_cleaned["answer"]})

In [95]:
# Convert embeddings to a list of lists
answer_embeddings_list = [embedding.tolist() for embedding in answer_embeddings]


In [97]:

# Add embeddings to the dataset
answers_dataset = answers_dataset.add_column("embeddings", answer_embeddings_list)


In [98]:
answers_dataset

Dataset({
    features: ['answers', 'embeddings'],
    num_rows: 18891
})

In [101]:
# Add FAISS index to the dataset
answers_dataset.add_faiss_index(column="embeddings")

  0%|          | 0/19 [00:00<?, ?it/s]

Dataset({
    features: ['answers', 'embeddings'],
    num_rows: 18891
})

In [102]:
answers_dataset

Dataset({
    features: ['answers', 'embeddings'],
    num_rows: 18891
})

In [None]:
# questions_dataset = Dataset.from_dict({"questions": ds["train"]["question"]})   

# question_embeddings_list = [embedding.tolist() for embedding in question_embeddings]

# questions_dataset = questions_dataset.add_column("embeddings", question_embeddings_list)

# questions_dataset.add_faiss_index(column="embeddings")

  0%|          | 0/88 [00:00<?, ?it/s]

Dataset({
    features: ['questions', 'embeddings'],
    num_rows: 87599
})

In [37]:
import pickle

# Save the datasets to a pickle file
with open('datasets.pkl', 'wb') as f:
    pickle.dump({'answers_dataset': answers_dataset, 'questions_dataset': questions_dataset}, f)

In [38]:
import pickle

# Load the datasets from the pickle file
with open('datasets.pkl', 'rb') as f:
    datasets = pickle.load(f)

answers_dataset = datasets['answers_dataset']
questions_dataset = datasets['questions_dataset']

In [115]:
def search_query_top_k_faiss(query, dataset, corpus, top_k=5):

    query_embedding = model.encode(query, convert_to_tensor=False)
    scores, samples = dataset.get_nearest_examples(
        "embeddings", query_embedding, k=top_k
    )
    samples_df = pd.DataFrame.from_dict(samples)
    samples_df["scores"] = scores
    samples_df.sort_values("scores", ascending=False, inplace=True)

    print("\nQuery:", query)
    print(f"Top {top_k} most similar sentences in corpus: \n")

    for i, (idx, row) in enumerate(samples_df.iterrows(), 1):
        print(
            f"{i}. (Score: {row['scores']:.4f}) index: {corpus.index(row["answers"])} \n",
            row["answers"],
        )
        print()

In [116]:
search_query_top_k_faiss(first_question, answers_dataset, answers, top_k=5)


Query: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Top 5 most similar sentences in corpus: 

1. (Score: 41.2264) index: 17583 
 It seems to have been St Bernard of Clairvaux who, in the 12th century, explicitly raised the question of the Immaculate Conception. A feast of the Conception of the Blessed Virgin had already begun to be celebrated in some churches of the West. St Bernard blames the canons of the metropolitan church of Lyon for instituting such a festival without the permission of the Holy See. In doing so, he takes occasion to repudiate altogether the view that the conception of Mary was sinless. It is doubtful, however, whether he was using the term "conception" in the same sense in which it is used in the definition of Pope Pius IX. Bernard would seem to have been speaking of conception in the active sense of the mother's cooperation, for in his argument he says: "How can there be absence of sin where there is concupiscence (libido)?" and stron

In [118]:
search_query_top_k_faiss(random, answers_dataset, answers, top_k=5)


Query: In what US state did Kathmandu first establish an international relationship?
Top 5 most similar sentences in corpus: 

1. (Score: 41.4668) index: 2467 
 In April 1954 Adenauer made his first visit to the USA meeting Nixon, Eisenhower and Dulles. Ratification of EDC was delaying but the US representatives made it clear to Adenauer that EDC would have to become a part of NATO.

2. (Score: 39.3312) index: 18833 
 Kathmandu(/ˌkɑːtmɑːnˈduː/; Nepali pronunciation: [kɑʈʰmɑɳɖu]) is the capital and largest municipality of Nepal. It also hosts the headquarters of the South Asian Association for Regional Cooperation (SAARC). It is the only city of Nepal with the administrative status of Mahanagar (Metropolitan City), as compared to Upa-Mahanagar (Sub-Metropolitan City) or Nagar (City). Kathmandu is the core of Nepal's largest urban agglomeration located in the Kathmandu Valley consisting of Lalitpur, Kirtipur, Madhyapur Thimi, Bhaktapur and a number of smaller communities. Kathmandu is a

## D zaimplementować wybraną metrykę oceny skuteczności wyszukiwania

In [121]:
def calculate_recall(query, questions, answer_embeddings, top_k=5):
    # Generate Embeddings for the query
    query_embedding = model.encode(query, convert_to_tensor=True)

    # Perform Search using semantic search
    cosine_scores = model.similarity(query_embedding, answer_embeddings)[0]
    _, indices = torch.topk(cosine_scores, k=top_k)

    # Calculate Recall
    relevant_index = questions.index(query)
    retrieved_indices = indices.tolist()
    recall = 1 if relevant_index in retrieved_indices else 0

    return recall

def calculate_recall_faiss(query, questions, answers_dataset, top_k=5):
    # Generate Embeddings for the query
    query_embedding = model.encode(query, convert_to_tensor=False)

    # Perform Search using FAISS index
    scores, samples = answers_dataset.get_nearest_examples("embeddings", query_embedding, k=top_k)
    samples_df = pd.DataFrame.from_dict(samples)
    samples_df["scores"] = scores
    samples_df.sort_values("scores", ascending=False, inplace=True)

    retrieved_indices = samples_df.index.tolist()

    # Calculate Recall
    relevant_index = questions.index(query)
    recall = 1 if relevant_index in retrieved_indices else 0

    return recall

# Example usage
subset_questions = questions[:100]  # Subset of questions
recall_semantic_search = []
recall_faiss_search = []

for query in subset_questions:
    recall_semantic_search.append(calculate_recall(query, questions, answer_embeddings, top_k=5))
    recall_faiss_search.append(calculate_recall_faiss(query, questions, answers_dataset, top_k=5))

average_recall_semantic_search = sum(recall_semantic_search) / len(recall_semantic_search)
average_recall_faiss_search = sum(recall_faiss_search) / len(recall_faiss_search)

print("Average Recall (Semantic Search):", average_recall_semantic_search)
print("Average Recall (FAISS Search):", average_recall_faiss_search)

Average Recall (Semantic Search): 0.96
Average Recall (FAISS Search): 0.05


In [124]:
def calculate_mrr(queries, questions, answer_embeddings, top_k=5):
    mrr_total = 0.0

    for query in queries:
        query_embedding = model.encode(query, convert_to_tensor=True)

        cosine_scores = model.similarity(query_embedding, answer_embeddings)[0]
        _, indices = torch.topk(cosine_scores, k=top_k)

        relevant_index = questions.index(query)
        retrieved_indices = indices.tolist()
        if relevant_index in retrieved_indices:
            rank = retrieved_indices.index(relevant_index) + 1
            reciprocal_rank = 1 / rank
        else:
            reciprocal_rank = 0

        mrr_total += reciprocal_rank

    mrr = mrr_total / len(queries)
    return mrr

def calculate_mrr_faiss(queries, questions, answers_dataset, top_k=5):
    mrr_total = 0.0

    for query in queries:
        query_embedding = model.encode(query, convert_to_tensor=False)

        scores, samples = answers_dataset.get_nearest_examples("embeddings", query_embedding, k=top_k)
        samples_df = pd.DataFrame.from_dict(samples)
        samples_df["scores"] = scores
        samples_df.sort_values("scores", ascending=False, inplace=True)

        retrieved_indices = samples_df.index.tolist()

        relevant_index = questions.index(query)
        if relevant_index in retrieved_indices:
            rank = retrieved_indices.index(relevant_index) + 1
            reciprocal_rank = 1 / rank
        else:
            reciprocal_rank = 0

        mrr_total += reciprocal_rank

    mrr = mrr_total / len(queries)
    return mrr

subset_questions = questions[:100]

mrr = calculate_mrr(subset_questions, questions, answer_embeddings, top_k=5)
print("Mean Reciprocal Rank (MRR):", mrr)

mrr_faiss = calculate_mrr_faiss(subset_questions, questions, answers_dataset, top_k=5)
print("Mean Reciprocal Rank (MRR) using FAISS:", mrr_faiss)

Mean Reciprocal Rank (MRR): 0.7955
Mean Reciprocal Rank (MRR) using FAISS: 0.02283333333333333


# 4

In [130]:
from sentence_transformers import CrossEncoder

re_ranker_model = CrossEncoder("cross-encoder/ms-marco-TinyBERT-L-2-v2", max_length=512)


In [138]:
import re


def rerank_similarity(query, corpus_embedding, corpus, top_k=5):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = model.similarity(query_embedding, corpus_embedding)[0]
    scores, indices = torch.topk(cosine_scores, k=top_k)
    print("\nQuery:", query)
    print("Top 5 most similar sentences in corpus: \n")


    for i, (score, idx) in enumerate(zip(scores, indices), 1):
        print(f"{i}. (Score: {score:.4f}) index: {idx} \n", corpus[idx])
        print()

    retrieved_answers = [corpus[idx] for idx in indices]

    # Prepare pairs for re-ranking
    pairs = [(query, answer) for answer in retrieved_answers]

    # Re-rank the retrieved answers
    reranked_scores = re_ranker_model.predict(pairs)
    reranked_indices = np.argsort(reranked_scores)[::-1]
    reranked_answers = [retrieved_answers[i] for i in reranked_indices]

    print("\nRe-ranked Answers:")
    for i, (score, answer) in enumerate(zip(reranked_scores, reranked_answers), 1):
        print(
            f"{i}. (Score: {score:.4f}) index: {corpus.index(answer)} \n",
            answer,
        )
        print()



def rerank_faiss(query, answers_dataset, corpus, top_k=5):
    # Retrieve top k answers using FAISS
    scores, samples = answers_dataset.get_nearest_examples("embeddings", model.encode(query, convert_to_tensor=False), k=top_k)

    samples_df = pd.DataFrame.from_dict(samples)
    samples_df["scores"] = scores
    samples_df.sort_values("scores", ascending=False, inplace=True)

    print("\nQuery:", query)
    print(f"Top {top_k} most similar sentences in corpus: \n")

    for i, (idx, row) in enumerate(samples_df.iterrows(), 1):
        print(
            f"{i}. (Score: {row['scores']:.4f}) index: {corpus.index(row["answers"])} \n",
            row["answers"],
        )
        print()

    retrieved_answers = samples["answers"]

    # Prepare pairs for re-ranking
    pairs = [(query, answer) for answer in retrieved_answers]

    # Re-rank the retrieved answers
    reranked_scores = re_ranker_model.predict(pairs)
    reranked_indices = np.argsort(reranked_scores)[::-1]
    reranked_answers = [retrieved_answers[i] for i in reranked_indices]

    print("\nRe-ranked Answers:")
    for i, (score, answer) in enumerate(zip(reranked_scores, reranked_answers), 1):
        print(
            f"{i}. (Score: {score:.4f}) index: {corpus.index(answer)} \n",
            answer,
        )
        print()



In [139]:
rerank_similarity(first_question, answer_embeddings, answers, top_k=5)



Query: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Top 5 most similar sentences in corpus: 

1. (Score: 21.6458) index: 0 
 Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.

2. (Score: 19.7955) index: 6907 
 The Gospel of Luke begins its account of Mary's life with the Annunciation, when the angel Gabriel appeared to her a

In [140]:

rerank_faiss(first_question, answers_dataset, answers, top_k=5)


Query: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Top 5 most similar sentences in corpus: 

1. (Score: 41.2264) index: 17583 
 It seems to have been St Bernard of Clairvaux who, in the 12th century, explicitly raised the question of the Immaculate Conception. A feast of the Conception of the Blessed Virgin had already begun to be celebrated in some churches of the West. St Bernard blames the canons of the metropolitan church of Lyon for instituting such a festival without the permission of the Holy See. In doing so, he takes occasion to repudiate altogether the view that the conception of Mary was sinless. It is doubtful, however, whether he was using the term "conception" in the same sense in which it is used in the definition of Pope Pius IX. Bernard would seem to have been speaking of conception in the active sense of the mother's cooperation, for in his argument he says: "How can there be absence of sin where there is concupiscence (libido)?" and stron

In [143]:
def calculate_recall_rerank(query, questions, answer_embeddings, top_k=5):
    # Generate Embeddings for the query
    query_embedding = model.encode(query, convert_to_tensor=True)

    # Perform Search using semantic search
    cosine_scores = model.similarity(query_embedding, answer_embeddings)[0]
    scores, indices = torch.topk(cosine_scores, k=top_k)

    retrieved_answers = [answers[idx] for idx in indices]

    # Prepare pairs for re-ranking
    pairs = [(query, answer) for answer in retrieved_answers]

    # Re-rank the retrieved answers
    reranked_scores = re_ranker_model.predict(pairs)
    reranked_indices = np.argsort(reranked_scores)[::-1].copy()
    reranked_answers = [retrieved_answers[i] for i in reranked_indices]

    # Calculate Recall
    relevant_index = questions.index(query)
    recall = 1 if relevant_index in indices[reranked_indices] else 0

    return recall

def calculate_recall_faiss_rerank(query, questions, answers_dataset, top_k=5):
    # Generate Embeddings for the query
    query_embedding = model.encode(query, convert_to_tensor=False)

    # Perform Search using FAISS index
    scores, samples = answers_dataset.get_nearest_examples("embeddings", query_embedding, k=top_k)
    samples_df = pd.DataFrame.from_dict(samples)
    samples_df["scores"] = scores
    samples_df.sort_values("scores", ascending=False, inplace=True)

    retrieved_answers = samples["answers"]

    # Prepare pairs for re-ranking
    pairs = [(query, answer) for answer in retrieved_answers]

    # Re-rank the retrieved answers
    reranked_scores = re_ranker_model.predict(pairs)
    reranked_indices = np.argsort(reranked_scores)[::-1]
    reranked_answers = [retrieved_answers[i] for i in reranked_indices]

    # Calculate Recall
    relevant_index = questions.index(query)
    recall = 1 if relevant_index in samples_df.index[reranked_indices] else 0

    return recall

subset_questions = questions[:100]
recall_rerank_semantic_search = []
recall_rerank_faiss_search = []

for query in subset_questions:
    recall_rerank_semantic_search.append(calculate_recall_rerank(query, questions, answer_embeddings, top_k=5))
    recall_rerank_faiss_search.append(calculate_recall_faiss_rerank(query, questions, answers_dataset, top_k=5))

average_recall_rerank_semantic_search = sum(recall_rerank_semantic_search) / len(recall_rerank_semantic_search)
average_recall_rerank_faiss_search = sum(recall_rerank_faiss_search) / len(recall_rerank_faiss_search)

print("Average Recall with Reranking (Semantic Search):", average_recall_rerank_semantic_search)
print("Average Recall with Reranking (FAISS Search):", average_recall_rerank_faiss_search)

Average Recall with Reranking (Semantic Search): 0.96
Average Recall with Reranking (FAISS Search): 0.05


In [147]:
def calculate_mrr_rerank(queries, questions, answer_embeddings, top_k=5):
    mrr_total = 0.0

    for query in queries:
        query_embedding = model.encode(query, convert_to_tensor=True)

        cosine_scores = model.similarity(query_embedding, answer_embeddings)[0]
        scores, indices = torch.topk(cosine_scores, k=top_k)

        retrieved_answers = [answers[idx] for idx in indices]

        # Prepare pairs for re-ranking
        pairs = [(query, answer) for answer in retrieved_answers]

        # Re-rank the retrieved answers
        reranked_scores = re_ranker_model.predict(pairs)
        reranked_indices = np.argsort(reranked_scores)[::-1]
        reranked_answers = [retrieved_answers[i] for i in reranked_indices]

        relevant_index = questions.index(query)
        if relevant_index in list(indices[reranked_indices].copy()):
            rank = list(indices[reranked_indices]).index(relevant_index) + 1
            reciprocal_rank = 1 / rank
        else:
            reciprocal_rank = 0

        mrr_total += reciprocal_rank

    mrr = mrr_total / len(queries)
    return mrr

def calculate_mrr_faiss_rerank(queries, questions, answers_dataset, top_k=5):
    mrr_total = 0.0

    for query in queries:
        query_embedding = model.encode(query, convert_to_tensor=False)

        scores, samples = answers_dataset.get_nearest_examples("embeddings", query_embedding, k=top_k)
        samples_df = pd.DataFrame.from_dict(samples)
        samples_df["scores"] = scores
        samples_df.sort_values("scores", ascending=False, inplace=True)

        retrieved_answers = samples["answers"]

        # Prepare pairs for re-ranking
        pairs = [(query, answer) for answer in retrieved_answers]

        # Re-rank the retrieved answers
        reranked_scores = re_ranker_model.predict(pairs)
        reranked_indices = np.argsort(reranked_scores)[::-1]
        reranked_answers = [retrieved_answers[i] for i in reranked_indices]

        relevant_index = questions.index(query)
        if relevant_index in samples_df.index[reranked_indices]:
            rank = list(samples_df.index[reranked_indices]).index(relevant_index) + 1
            reciprocal_rank = 1 / rank
        else:
            reciprocal_rank = 0

        mrr_total += reciprocal_rank

    mrr = mrr_total / len(queries)
    return mrr

# Example usage
subset_questions = questions[:100]

mrr_rerank_semantic_search = calculate_mrr_rerank(subset_questions, questions, answer_embeddings, top_k=5)
print("Mean Reciprocal Rank (MRR) with Reranking (Semantic Search):", mrr_rerank_semantic_search)

mrr_rerank_faiss = calculate_mrr_faiss_rerank(subset_questions, questions, answers_dataset, top_k=5)
print("Mean Reciprocal Rank (MRR) with Reranking (FAISS):", mrr_rerank_faiss)

ValueError: At least one stride in the given numpy array is negative, and tensors with negative strides are not currently supported. (You can probably work around this by making a copy of your array  with array.copy().) 