<a href="https://colab.research.google.com/github/TarunNagdeve/Cats-Vs-Dogs/blob/master/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install django

Collecting django
  Downloading Django-4.2.4-py3-none-any.whl (8.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting asgiref<4,>=3.6.0 (from django)
  Downloading asgiref-3.7.2-py3-none-any.whl (24 kB)
Installing collected packages: asgiref, django
Successfully installed asgiref-3.7.2 django-4.2.4


In [None]:
import numpy as np
import faiss
from sklearn.feature_extraction.text import TfidfVectorizer

# Load your data frames (adjust this part)
data_frames = [df1, df2, df3]  # List of your data frames

# Prepare a list to store all embeddings and metadata
all_embeddings = []
metadata = []  # Store metadata like document index and text index

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Iterate over each data frame
for doc_index, df in enumerate(data_frames):
    # Assuming your data frame has a column named 'text' containing text data
    text_data = df['text'].tolist()

    # Convert text data to TF-IDF embeddings
    tfidf_matrix = vectorizer.fit_transform(text_data)

    # Convert sparse matrix to dense array
    embeddings = tfidf_matrix.toarray()

    # Store embeddings and metadata
    all_embeddings.extend(embeddings)
    for text_index in range(len(embeddings)):
        metadata.append({'doc_index': doc_index, 'text_index': text_index})

# Convert the list of metadata into a numpy array
metadata_array = np.array(metadata)

# Normalize the embeddings
all_embeddings /= (np.linalg.norm(all_embeddings, axis=1, keepdims=True) + 1e-6)

# Create a FAISS index
d = len(all_embeddings[0])  # Dimension of embeddings
index = faiss.IndexFlatIP(d)  # Use Inner Product (IP) similarity measure

# Train the index on your data
index.add(all_embeddings)

# Example search (adjust the query_embedding)
query_embedding = ...  # Your query embedding
k = 5  # Number of nearest neighbors to retrieve
D, I = index.search(query_embedding, k)

# Now you can use the metadata_array to map back to specific text entries
for distances, indices in zip(D, I):
    for distance, index in zip(distances, indices):
        text_metadata = metadata_array[index]
        doc_index = text_metadata['doc_index']
        text_index = text_metadata['text_index']
        print(f"Document {doc_index}, Text Entry {text_index}, Distance: {distance}")


In [None]:
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from .models import SentimentAnalysisResult
from .serializers import SentimentAnalysisResultSerializer
import your_ml_library  # Import your ML library and model here

class SentimentAnalysisAPI(APIView):
    def post(self, request, format=None):
        input_text = request.data.get('input_text', '')

        # Perform sentiment analysis using your ML model
        sentiment_output = your_ml_library.perform_sentiment_analysis(input_text)

        # Save the result to the database
        sentiment_result = SentimentAnalysisResult(input_text=input_text, sentiment_output=sentiment_output)
        sentiment_result.save()

        serializer = SentimentAnalysisResultSerializer(sentiment_result)
        return Response(serializer.data, status=status.HTTP_201_CREATED)


In [None]:
import re

# Example text
example_text = """
Some text here.
6.0 PROCEDURE
This is the procedure section.
It can have multiple lines.
7.0 LIST
This is the list section.
End of the text.
"""

# Define the regex pattern
pattern = re.compile(r'6\.0 PROCEDURE(.*?)7\.0 LIST', re.DOTALL | re.IGNORECASE)

# Find the matched text
match = re.search(pattern, example_text)

# Extract the matched content
if match:
    extracted_text = match.group(1).strip()
    print(extracted_text)


In [None]:
import milvus
from sklearn.feature_extraction.text import TfidfVectorizer

# Connect to the Milvus server
milvus_client = milvus.Milvus(host='localhost', port='19530')

# Create a Milvus collection for each document
document_collection_names = ['document_1', 'document_2', 'document_3']  # Adjust as needed

# Iterate over each document
for doc_index, collection_name in enumerate(document_collection_names):
    # Load section indexes and text entries for the current document (adjust as needed)
    section_indexes = [1, 2, 3, ...]  # Load your section indexes
    text_entries = ["text entry 1", "text entry 2", "text entry 3", ...]  # Load your text entries

    # Convert text to embeddings using TF-IDF
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(text_entries)
    embeddings = X.toarray()

    # Create a collection for the current document
    milvus_client.create_collection({
        'collection_name': collection_name,
        'dimension': len(embeddings[0]) + 2,  # Embeddings + 2 for text and section indexes
        'index_file_size': 1024,
        'metric_type': milvus.MetricType.IP  # Choose appropriate metric type
    })

    # Create a list of dictionaries containing embeddings, text, and section indexes
    entities = [{'embedding': emb.tolist(), 'text': text, 'section_index': section}
                for emb, text, section in zip(embeddings, text_entries, section_indexes)]

    # Insert embeddings, text, and section indexes into the collection
    ids = milvus_client.insert(collection_name=collection_name, records=entities)

# Close the Milvus connection
milvus_client.close()


In [None]:
!django-admin startproject MyAPI


In [None]:
! cd MyAPI/

In [None]:
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(8000)"))

https://ui1ofxe6uh-496ff2e9c6d22116-8000-colab.googleusercontent.com/


In [None]:
ALLOWED_HOSTS = ['colab.research.google.com']

In [None]:
!python /content/MyAPI/manage.py runserver 8000

Watching for file changes with StatReloader
Performing system checks...

System check identified no issues (0 silenced).
[31m
You have 18 unapplied migration(s). Your project may not work properly until you apply the migrations for app(s): admin, auth, contenttypes, sessions.[0m
[31mRun 'python manage.py migrate' to apply them.[0m
August 26, 2023 - 07:04:07
Django version 4.2.4, using settings 'MyAPI.settings'
Starting development server at http://127.0.0.1:8000/
Quit the server with CONTROL-C.

