In [1]:
!git clone https://github.com/A7medM0sta/Tasks_infotraff.git

Cloning into 'Tasks_infotraff'...
remote: Enumerating objects: 79, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 79 (delta 3), reused 1 (delta 1), pack-reused 67[K
Receiving objects: 100% (79/79), 291.25 KiB | 4.94 MiB/s, done.
Resolving deltas: 100% (22/22), done.


In [2]:
!pip install transformers pandas



In [3]:
import pandas as pd

class DataLoader:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None

    def load_data(self):
        self.data = pd.read_excel(self.file_path)
        # print("Data loaded successfully.")

    def get_data(self):
        return self.data

In [4]:
import json
from transformers import pipeline
import pandas as pd

class QueryProcessor:
    def __init__(self, intents_file):
        self.data = None
        self.model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        self.intents = self.load_intents(intents_file)

    def load_intents(self, intents_file):
        with open(intents_file, 'r') as file:
            data = json.load(file)
        return {intent['name']: intent for intent in data['intents']}

    def set_data(self, data):
        self.data = data

    def process_query(self, query):
        # Identify intent
        result = self.model(query, list(self.intents.keys()))
        intent_name = result['labels'][0]
        intent = self.intents[intent_name]

        if intent_name == "most common visitor type":
            return self.most_common_visitor_type()

        elif intent_name == "most visitors in a minute":
            return self.most_visitors_in_minute()

        elif intent_name == "visitor count with single attribute":
            return self.visitor_count_with_single_attribute(query)

        elif "peak time" in query.lower():
            return self.visitors_during_peak_time(query)

        else:
            return "Sorry, I didn't understand your query."

    def most_common_visitor_type(self):
        visitor_types = ['Is Male', 'Is Female', 'Is Hijab', 'Is Child', 'Is Niqab', 'Has Bag']
        most_common_type = self.data[visitor_types].sum().idxmax()
        occurrences = self.data[most_common_type].sum()
        return f"The most common visitor type is '{most_common_type}' with {occurrences} occurrences."

    def most_visitors_in_minute(self):
        self.data['minute'] = self.data['Time'].dt.floor('T')
        peak_time = self.data['minute'].value_counts().idxmax()
        visitors_at_peak = self.data['minute'].value_counts().max()
        return f"The minute with the most visitors is {peak_time} with {visitors_at_peak} visitors."

    def visitor_count_with_single_attribute(self, query):
        attributes = {
            "hijab": "Is Hijab",
            "female": "Is Female",
            "male": "Is Male",
            "child": "Is Child",
            "niqab": "Is Niqab",
            "bag": "Has Bag",
            "woman": "Is Female",
        }
        for key, value in attributes.items():
            if key in query.lower() or key + 's' in query.lower():
                count = self.data[self.data[value] == 1].shape[0]
                return f"The number of '{key}' visitors is {count}."
        return "Sorry, I couldn't identify the specific attribute."

    def visitors_during_peak_time(self, query):
        self.data['minute'] = self.data['Time'].dt.floor('T')
        peak_time = self.data['minute'].value_counts().idxmax()
        attribute = None
        if "female" in query.lower() or "woman" in query.lower():
            attribute = "Is Female"
        elif "male" in query.lower():
            attribute = "Is Male"
        elif "hijab" in query.lower():
            attribute = "Is Hijab"
        elif "child" in query.lower():
            attribute = "Is Child"
        elif "niqab" in query.lower():
            attribute = "Is Niqab"
        elif "bag" in query.lower():
            attribute = "Has Bag"

        if attribute:
            count = self.data[(self.data['minute'] == peak_time) & (self.data[attribute] == 1)].shape[0]
            return f"The number of visitors with '{attribute}' during peak time ({peak_time}) is {count}."
        else:
            return f"The minute with the most visitors is {peak_time} with {self.data['minute'].value_counts().max()} visitors."

In [5]:
from transformers import pipeline

class LLM:
    def __init__(self):
        self.generator = pipeline('text2text-generation', model='facebook/bart-large')
    def ask_question(self, query):
        return self.generator(query)[0]['generated_text']

In [6]:
class MainApplication:
    def __init__(self, data_loader, query_processor, llm):
        self.data_loader = data_loader
        self.query_processor = query_processor
        self.llm = llm

    def run_query(self, query):
        # Load data once when running the query
        self.data_loader.load_data()
        data = self.data_loader.get_data()

        # Ensure the data is loaded correctly
        if data is None:
            raise ValueError("Data could not be loaded. Please check the file path and format.")

        self.query_processor.set_data(data)  # Pass the loaded data to the query processor
        response = self.query_processor.process_query(query)
        return response

In [9]:
# Load and process data
data_loader = DataLoader(file_path='/content/Tasks_infotraff/NLP/Intern NLP Dataset.xlsx')
query_processor = QueryProcessor('/content/Tasks_infotraff/NLP/intents.json')

llm = LLM()

# Instantiate the main application
app = MainApplication(data_loader, query_processor, llm)
"""Cifar10 dataset preprocessing and specifications."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tarfile
import numpy as np
from six.moves import cPickle
from six.moves import urllib
import tensorflow as tf

REMOTE_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
LOCAL_DIR = os.path.join("data/cifar10/")
ARCHIVE_NAME = "cifar-10-python.tar.gz"
DATA_DIR = "cifar-10-batches-py/"
TRAIN_BATCHES = ["data_batch_%d" % (i + 1) for i in range(5)]
TEST_BATCHES = ["test_batch"]

IMAGE_SIZE = 32
NUM_CLASSES = 10

def get_params():
  """Return dataset parameters."""
  return {
    "image_size": IMAGE_SIZE,
    "num_classes": NUM_CLASSES,
  }

def prepare():
  """Download the cifar dataset."""
  if not os.path.exists(LOCAL_DIR):
    os.makedirs(LOCAL_DIR)
  if not os.path.exists(LOCAL_DIR + ARCHIVE_NAME):
    print("Downloading...")
    urllib.request.urlretrieve(REMOTE_URL, LOCAL_DIR + ARCHIVE_NAME)
  if not os.path.exists(LOCAL_DIR + DATA_DIR):
    print("Extracting files...")
    tar = tarfile.open(LOCAL_DIR + ARCHIVE_NAME)
    tar.extractall(LOCAL_DIR)
    tar.close()

def read(split):
  """Create an instance of the dataset object."""
  """An iterator that reads and returns images and labels from cifar."""
  batches = {
    tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
    tf.estimator.ModeKeys.EVAL: TEST_BATCHES
  }[split]

  all_images = []
  all_labels = []

  for batch in batches:
    with open("%s%s%s" % (LOCAL_DIR, DATA_DIR, batch), "rb") as fo:
      dict = cPickle.load(fo)
      images = np.array(dict["data"])
      labels = np.array(dict["labels"])

      num = images.shape[0]
      images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
      images = np.transpose(images, [0, 2, 3, 1])
      print("Loaded %d examples." % num)

      all_images.append(images)
      all_labels.append(labels)

  all_images = np.concatenate(all_images)
  all_labels = np.concatenate(all_labels)

  return tf.contrib.data.Dataset.from_tensor_slices((all_images, all_labels))

def parse(image, label):
  """Parse input record to features and labels."""
  image = tf.to_float(image) / 255.0
  image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
  return {"image": image}, {"label": label}

# Example queries
queries = [
  #   "Who is my most common visitor ?",
  #   "What time did I get most visits ?",
  #   "How many hijab visited me ?",
  #  'How many females visited me ?',
    "How many Famele visited me in my peak time ?",
     "How many Male visited me in my peak time ?",
  "How many Childern visited me in my peak time ?"
]

# Process and print the responses
for query in queries:
    response = app.run_query(query)
    print(response)

The minute with the most visitors is 2024-07-17 15:12:00 with 5 visitors.
The number of visitors with 'Is Male' during peak time (2024-07-17 15:12:00) is 0.
The number of visitors with 'Is Child' during peak time (2024-07-17 15:12:00) is 0.


In [None]:
# Please enter your query in natural language:
# Who is my most common visitor ?
# Intent: Most Common Visitor Type
# The most common visitor type is 'Is Female' with 52 occurrences.

# Please enter your query in natural language:
# What time did I get most visits ?
# Intent: Most Visitors in a Minute
# The minute with the most visitors is 2024-07-17 15:12:00 with 5 visitors.

# Please enter your query in natural language:
# How many hijab visited me ?
# Intent: Visitor Count with Single Attribute
# The number of the specific visitors is 32.

# Please enter your query in natural language:
# How many females visited me ?
# Intent:Visitor Count with Single Attribute
# The number of the specific visitors is 52.

# Please enter your query in natural language:
# How many woman visited me in my peak time ?
# Intent: Most Visitors in a Minute
# The minute with the most visitors is 2024-07-17 15:12:00 with 5 visitors.

# Please enter your query in natural language:
# Break Most Visitors in a Minute
# The minute with the most visitors is 2024-07-17 15:12:00 with 5 visitors.