## Boilerplate

In [None]:
!pip install -r requirements.txt

In [None]:
!pip install --upgrade openai

In [None]:
!pip install --upgrade openai-whisper

In [None]:
!sudo apt update && sudo apt install ffmpeg

In [None]:
import whisper
import cv2
import os
import urllib.request
from PIL import Image
from ultralytics import YOLO
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
from transformers import pipeline
import moviepy.editor as mp
import json
import re

In [None]:
# Add path to video
# For a quick test, use this 4-minute long video clip: https://drive.google.com/file/d/1ewLP2R6_41w_17hSjoP9TNwwVZBEkZ1_/view?usp=sharing

video_path = input("Enter the path to the video: ")

## Video Transcription

In [None]:
model = whisper.load_model('large-v3')

In [None]:
def audio_to_text(path):
  result = model.transcribe(path,verbose=True)
  return result

def transcript_to_string(transcript):
    string = ''
    for segments in transcript['segments']:
      string+=str(segments['id'])+str(segments['text']+'\n')
    return string

In [None]:
transcript = audio_to_text(video_path)

In [None]:
json_file_path = video_path.split('/')[-1][:-4]+ ".json"

# Save the JSON object to the file
with open(json_file_path, 'w') as json_file:
    json.dump(transcript, json_file, indent=2)

print(f"JSON data saved to: {json_file_path}")

## Action Detection

Actions:

1. Officers complain about the lack of a plan
2. Officers fail to offer people with directions
3. Officers direct aggressive comments at protestors

In [None]:
from keras.src.engine.training import input_ops
OPENAI_API_KEY= input('Enter your API key here:')

In [None]:
def read_transcript_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        transcript = file.read()
    return transcript


# Provide the path to your transcript file
json_transcript_path = input('Enter json path: ')

with open(json_transcript_path, 'r') as f:
  transcript = json.load(f)
transcript_string = transcript_to_string(transcript)


Enter json path:/content/test_video.json


In [None]:
from openai import OpenAI

client = OpenAI(api_key = OPENAI_API_KEY)

completion = client.chat.completions.create(
  model="gpt-3.5-turbo-1106",
  messages=[
    {"role": "system", "content": f"Given this {transcript_string} You are an AI system specialized in detecting planning issues, critiquing plans, and analyzing conversations between people regarding how to disperse. Additionally, identify any instances suggesting 1st Amendment violations or officers expressing the belief that this protest was anti-police. Finally, flag any aggressive comments found in the audio transcript."},
    {"role": "user", "content":"Give responce like this following examples: Sentence: '18: What do you got?' Explanation: This sentence may indicate confusion or a need for clarification, as the speaker is asking for information. It could potentially be a planning issue if the speaker is seeking information to execute a specific task."}
  ]
)

In [None]:
output = completion.choices[0].message.content

In [None]:
paragraphs = re.split(r'\n\n', output)

# Initialize empty lists to store sentences and explanations
sentences = []
explanations = []

# Iterate through each paragraph
for paragraph in paragraphs:
    # Use regular expressions to find sentences and explanations
    sentence_match = re.search(r"Sentence: '(.+)'", paragraph)
    explanation_match = re.search(r"Explanation: (.+)", paragraph)

    # If both sentence and explanation are found, append them to the respective lists
    if sentence_match and explanation_match:
        sentences.append(sentence_match.group(1).split(': ')[-1])
        explanations.append(explanation_match.group(1))

# Print the results
for i in range(len(sentences)):
    print(f"Sentence: '{sentences[i]}'")
    print(f"Explanation: {explanations[i]}\n")


Sentence: 'GIMME 54.'
Explanation: This sentence may indicate an urgent need for assistance, specifically a request for items or support. It could potentially be a planning issue if the speaker is trying to coordinate logistics or resources for a specific task.

Sentence: 'What the fuck is this?'
Explanation: This sentence may indicate confusion, frustration, or agitation, suggesting that the speaker is questioning the situation or circumstances. It could potentially be a planning issue if the speaker is uncertain about the plan of action or decision-making process.

Sentence: 'Back up.'
Explanation: This sentence may indicate a need to create physical space or establish order, possibly to facilitate movement or address overcrowding. It could potentially be a planning issue if the speaker is trying to implement a structural plan for organizing a physical space.

Sentence: 'I'm armed civilian.'
Explanation: This sentence may indicate the speaker's intent to communicate their armed statu

In [None]:
# Assuming your list of dictionaries is named 'data'
for sentence_to_search in sentences:
    pattern = re.compile(re.escape(sentence_to_search), re.IGNORECASE)

    matching_entries = [entry for entry in transcript['segments'] if re.search(pattern, entry['text'])]

    if matching_entries:
        for entry in matching_entries:
            print(sentence_to_search, 'time start:', entry['start'],'time stop:', entry['end'])

GIMME 54. time start: 737.96 time stop: 740.96
What the fuck is this? time start: 544.0 time stop: 553.0
Back up. time start: 614.0 time stop: 617.0
Back up. time start: 619.0 time stop: 620.0
Back up. time start: 625.0 time stop: 626.0
Back up. time start: 1310.6 time stop: 1311.6
Back up. time start: 1316.6 time stop: 1318.6
Back up. time start: 1319.6 time stop: 1321.6
Back up. time start: 7384.42 time stop: 7385.42
I'm armed civilian. time start: 637.0 time stop: 639.0


## Baton Detection

In [None]:
# YOLOv8 Model Fine-Tuned for detection of Police, Protestors, and Batons
# Fine-Tuned Model Weights can be downloaded from here: https://drive.google.com/file/d/1IlKpRO27gYErr4NZvhfuTwyliRgKvc8m/view?usp=sharing

model_path = input("Enter the path to the model weights: ")
model = YOLO(model_path)

In [None]:
# This code cell detects batons in the video

current_frame = 0

cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
conseq_frames = 0
start_time = ""
end_time = ""

while True:
    ret, frame = cap.read()
    if not ret:
      break

    # Detecting baton on one frame per second
    if current_frame % fps == 0:
      currect_sec = current_frame/fps

      # Model prediction on current frame
      results = model(frame, verbose = False)
      count = 0
      classes = results[0].boxes.data

      # Formatting the time for printing
      hours, remainder = divmod(currect_sec, 3600)
      minutes, seconds = divmod(remainder, 60)
      hours = str(int(hours)).zfill(2)
      minutes = str(int(minutes)).zfill(2)
      seconds = str(int(seconds)).zfill(2)

      for i in classes:

        # Checking if baton is detected (i.e. if the class corresponding to baton is 1 or not)
        if float(i[5]) == 1:
          count+=1

      # Marking the start_time if this is the first consecutive frame a baton is detected in
      if count >= 1:
        conseq_frames+=1
        if conseq_frames == 1:
          start_time = hours + ":" + minutes + ":" + seconds

      # Marking the end time if after one or multiple consecutive frames of detection, a baton is not detected
      else:
        if conseq_frames > 0:
          conseq_frames = 0
          end_time = hours + ":" + minutes + ":" + seconds

          # Printing time intervals in which baton was detected
          print(f"Baton found from {start_time} to {end_time}")
          start_time = ""
          end_time = ""

    current_frame += 1

cap.release()