# Extract all conversations

In [None]:
from conversation_extractor import ConversationExtractor
import os

extractor = ConversationExtractor()

for subdir, dirs, files in os.walk(f"../data/raw"):
    for file in files:
        if (extractor.is_csv_file(file)):
            continue

        path = os.path.join(subdir, file)
        print(f"Extracting conversations from {path}...")
        conversations_by_url = extractor.extract_conversations_by_url(
            path, print_process=False)
        extractor.save_conversations(path, conversations_by_url)

# Filter conversations

In [None]:
from conversation_filter import ConversationFilter
import os

conversation_filter = ConversationFilter()
print_process = False

conversations_with_code = {}
python_conversation = {}
js_conversation = {}
ts_conversation = {}
java_conversation = {}

# Read every file of the data/interim/conversations folder
for subdir, dirs, files in os.walk(f"../data/interim/conversations"):
    for file in files:
        path = os.path.join(subdir, file)
        print(f"Filtering conversations from {path}...")
        conversations_by_url = conversation_filter.load_conversations(path)

        new_conversations_with_code = conversation_filter.get_conversations_with_code(
            conversations_by_url, print_process)
        new_python_conversation = conversation_filter.get_python_conversations(
            conversations_by_url, print_process)
        new_js_conversation = conversation_filter.get_js_conversations(
            conversations_by_url, print_process)
        new_ts_conversation = conversation_filter.get_ts_conversations(
            conversations_by_url, print_process)
        new_java_conversation = conversation_filter.get_java_conversations(
            conversations_by_url, print_process)
        
        conversations_with_code.update(new_conversations_with_code)
        python_conversation.update(new_python_conversation)
        js_conversation.update(new_js_conversation)
        ts_conversation.update(new_ts_conversation)
        java_conversation.update(new_java_conversation)


conversation_filter.save_conversations(
    conversations_with_code, 'with-code')
conversation_filter.save_conversations(python_conversation, 'python')
conversation_filter.save_conversations(js_conversation, 'javascript')
conversation_filter.save_conversations(ts_conversation, 'typescript')
conversation_filter.save_conversations(java_conversation, 'java')

# Extract Code Snippets as Source Code files

In [1]:
from conversation_io import ConversationIO
from source_code_extractor import SourceCodeExtractor

conversation_io = ConversationIO()
source_code_extractor = SourceCodeExtractor()

url = "../data/interim/filtered-conversations"
type = "python"
print_process = False

conversations_by_url = conversation_io.load_conversations(f"{url}/conversations-{type}.json")

source_codes = source_code_extractor.extract(conversations_by_url)

source_code_extractor.export_source_code(source_codes, type)
