In [1]:
import openai
import os
import numpy as np
import time
import re

In [2]:
# Ścieżka do folderu z plikami
folder_path = "../../../videos/preprocessed_videos/squats/strength_training"
skip_first_comment = True

# Preprocessing plików tekstowych

In [3]:
# Funkcja do wyodrębnienia sekcji z jednego pliku
def extract_sections(file_path, skip_first_line=False):
    # Wzorzec dla tagów i ich treści
    pattern = re.compile(
        r"(Title|URL|Selftext|Media|Comments):\s*(.*?)(?=\n(?:Title|URL|Selftext|Media|Comments|$))",
        re.DOTALL
    )
    
    # Słownik na zawartość poszczególnych tagów
    sections = {"Title": "", "URL": "", "Selftext": "", "Media": "", "Comments": []}
    
    # Wczytaj plik
    with open(file_path, "r", encoding="utf-8") as file:
        content = file.read()
    
    # Dopasuj wszystkie sekcje
    matches = pattern.findall(content)
    
    # Przypisz dopasowania do słownika
    for tag, text in matches:
        sections[tag] = text.strip()

    # Teraz zajmiemy się sekcją "Comments", aby uwzględnić dodatkowe linie po "Comments:"
    comments_pattern = re.compile(r"Comments:\s*(.*)", re.DOTALL)
    comments_match = comments_pattern.search(content)
    
    if comments_match:
        # Przetwórz komentarze do listy
        comments_text = comments_match.group(1).strip()
        
        # Wzorzec do wyodrębniania numerowanych komentarzy
        comment_lines = re.split(r"\n\s*\d+\.\s*", comments_text)  # Rozdziel komentarze po numerach
        comment_lines = [line.strip() for line in comment_lines if line.strip()]  # Usuwanie pustych linii

        if(skip_first_line == True):
            # Pomiń pierwszy komentarz
            sections["Comments"] = comment_lines[1:]  # Usuwamy pierwszy komentarz z listy
        else:
            sections["Comments"] = comment_lines

    return sections

# Funkcja do przetwarzania wszystkich plików w folderze
def process_all_files(folder_path):
    # Słownik na dane z wszystkich plików
    all_data = {}
    
    # Iteracja przez pliki w folderze
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)
            
            # Wyodrębnienie sekcji dla bieżącego pliku
            sections = extract_sections(file_path, skip_first_comment)
            
            # Dodanie danych do słownika, klucz to nazwa pliku
            all_data[filename] = sections
    
    return all_data


In [4]:
# Wywołanie funkcji dla wszystkich plików w folderze
all_files_data = process_all_files(folder_path)

# Wyświetlenie wyników
for filename, sections in all_files_data.items():
    print(f"File: {filename}")
    print("Title:", sections["Title"])
    print("URL:", sections["URL"])
    print("Selftext:", sections["Selftext"])
    print("Media:", sections["Media"])
    print("Comments:")
    for idx, comment in enumerate(sections["Comments"], 1):
        print(f" Komentarz numer: {idx}. {comment}")
    print("-" * 40)

File: 0-0001-90-5-1-3-0-0.mp4.txt
Title: Squat form check
URL: https://v.redd.it/0v1md04wy7rb1
Selftext: I’ve always had way more trouble with squats than any other lift, so I  usually give up on them. I’d like to add them back to my regular leg day and slowly get stronger with them, but I want to make sure my form is solid before I try to go heavier.

I’m mainly worried about buttwink and my hips rising. I don’t think it’s quite a good morning or anything but it does look like I lean forward a bit going up. This is my last set so it should be a decent showcase of slightly broken down form towards the end.
Media: ['https://v.redd.it/0v1md04wy7rb1/DASH_1080.mp4?source=fallback']
Comments:
 Komentarz numer: 1. Keep your chest up and pack your chin. With high bar, you don’t want your head in that position.
 Komentarz numer: 2. Head up, maybe lower the bar a tad, and you don’t need to walk back that much. I usually just take one step back with each foot
 Komentarz numer: 3. Put clipper on 

# GPT API

In [6]:
# Inicjalizacja klienta Cohere
# https://cohere.com/

with open('../api.txt', 'r', encoding='utf-8') as file:
    for line in file:
        if 'gpt:' in line:
            # Wyciągamy tekst po "cohere:"
            key_api = line.split('gpt:')[1].strip()

# Moje api
openai.api_key = key_api

from openai import OpenAI

client = OpenAI(
    api_key = key_api,
)

# Rekompilacja komentarzy

In [13]:
def query_gpt(prompt):
    try:
        # Query GPT using the new API method
        response =  client.chat.completions.create(
            model="gpt-3.5-turbo",  # You can use "gpt-4", "gpt-4-turbo", or "gpt-3.5-turbo"
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.7
        )
        
        # Extract and return the response text
        return response.choices[0].message.content.strip()

    except Exception as e:
        print("error")
        return str(e)

In [18]:
analysis_comments_prompt = {}

i = 0

# Wyświetlenie wyników
for filename, data in all_files_data.items():
    start_time = time.time()

    print(f"Wyniki dla nagrania: {filename}, numer {i}")
    i += 1
    if i >= 10:
        break
        
    title = data['Title']
    url = data['URL']
    selftext = data['Selftext']
    media = data['Media']
    # Bierzemy wszystkie komentarze, zaczynając od drugiego
    comments = data['Comments']  # Pomijamy pierwszy komentarz

    exercise = "squat"
    # Zbudowanie promptu na podstawie komentarzy
    prompt = f"Please analyze the following comments and provide a detailed analysis of {exercise} technique improvement: Get information from all comments below. Let your answer be coherent. Just write summary nothing more. Don't mention that you used comments.\n\n"
    prompt += "\n".join(comments)  # Łączymy wszystkie komentarze w jeden ciąg
    
    # Generowanie analizy techniki, jeśli komentarze istnieją
    if comments:
        analysis = query_gpt(prompt)
    else:
        analysis = "No comments available for analysis."
    
    # Dodajemy wynik do listy
    analysis_comments_prompt[filename] = {
        'Title': title,
        'URL': url,
        'Selftext': selftext,
        'Media': media,
        'Comments' : comments,
        'Analysis': analysis
    }
    
    print('*'*100)
    print(f"Title: {title}")
    print(f"URL: {url}")
    print(f"Selftext: {selftext}")
    print(f"Media: {media}")
    print('+'*100)
    print(f"Komentarze: \n {comments}\n")
    print('+'*100)
    print('-'*100)
    print(f"Analysis: \n {analysis}\n")
    print('-'*100)
    print('*'*100)
    
    end_time = time.time()
    print(f"Przetwarzanie trwało {end_time - start_time:.4f} sekundy")
    
    print("-" * 40)

Wyniki dla nagrania: 0-0001-90-5-1-3-0-0.mp4.txt, numer 0
****************************************************************************************************
Title: Squat form check
URL: https://v.redd.it/0v1md04wy7rb1
Selftext: I’ve always had way more trouble with squats than any other lift, so I  usually give up on them. I’d like to add them back to my regular leg day and slowly get stronger with them, but I want to make sure my form is solid before I try to go heavier.

I’m mainly worried about buttwink and my hips rising. I don’t think it’s quite a good morning or anything but it does look like I lean forward a bit going up. This is my last set so it should be a decent showcase of slightly broken down form towards the end.
Media: ['https://v.redd.it/0v1md04wy7rb1/DASH_1080.mp4?source=fallback']
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Komentarze: 
 ['Keep your chest up and pack your chin. With high bar, you don’t want yo