In [1]:
import json
import pandas as pd

In [2]:
# Load the JSON file
with open("./00_Downloaded/20230816.json", "r") as file:
    data = json.load(file)

In [3]:
# Extracting and flattening messages from all conversations in the list
all_messages = []

In [4]:
for conversation in data:
    mapping = conversation.get('mapping', {})
    for key, value in mapping.items():
        message_data = value.get('message', {})
        if message_data:
            all_messages.append({
                'id': message_data.get('id', None),
                'author_role': message_data.get('author', {}).get('role', None),
                'create_time': message_data.get('create_time', None),
                'content': message_data.get('content', {}).get('parts', [None])[0],
                'status': message_data.get('status', None),
                'parent': value.get('parent', None)
            })

In [5]:
# Convert the list of dictionaries into a DataFrame
df_messages = pd.DataFrame(all_messages)

In [6]:
df_messages.head()

Unnamed: 0,id,author_role,create_time,content,status,parent
0,b0655646-af01-4f78-81d0-7f0b799be5ca,system,,,finished_successfully,1F583269-E02E-49B6-9BDB-AF72858F4330
1,f51bf072-2a25-4f6d-afa4-a779d49f6de0,user,1692110000.0,I want to grant access to user in ms sql that ...,finished_successfully,b0655646-af01-4f78-81d0-7f0b799be5ca
2,8ac9ee2a-ddf7-44d2-8f64-71c3653c6f54,assistant,1692110000.0,"In Microsoft SQL Server, if you want a user to...",finished_successfully,f51bf072-2a25-4f6d-afa4-a779d49f6de0
3,1422d334-d1df-419b-a1be-2ed251f0485d,user,1692110000.0,But i want to grant the access to that user to...,finished_successfully,8ac9ee2a-ddf7-44d2-8f64-71c3653c6f54
4,d9cbf9ec-d71a-4391-b8e9-5fe21a1d6e99,assistant,1692110000.0,If you want to grant the `VIEW DEFINITION` per...,finished_successfully,1422d334-d1df-419b-a1be-2ed251f0485d


In [7]:
df_messages# Save the DataFrame to a CSV file (optional)
df_messages.to_csv("./01_To_CSV/20230816.csv", index=False)

In [8]:
chunk_size = 500  # Number of rows per chunk
num_chunks = len(df_messages) // chunk_size + 1

with open("README.md", "w") as file:
    file.write("# Extracted Messages\n\n")
    
    # Write each chunk to the file
    for i in range(num_chunks):
        chunk = df_messages.iloc[i * chunk_size : (i + 1) * chunk_size]
        file.write(chunk.to_markdown())
        file.write("\n\n")

"README.md file has been created successfully with chunked data."


'README.md file has been created successfully with chunked data.'

In [9]:
# Number of rows per file
rows_per_file = 100

# Calculate the number of files needed
num_files = len(df_messages) // rows_per_file + 1

file_names = []

# Create multiple README files
for i in range(num_files):
    start_idx = i * rows_per_file
    end_idx = (i + 1) * rows_per_file
    chunk = df_messages.iloc[start_idx:end_idx]
    file_name = f"README_{i+1}.md"
    file_names.append(file_name)
    with open(file_name, "w") as file:
        file.write(f"# Extracted Messages - Part {i+1}\n\n")
        file.write(chunk.to_markdown())
        file.write("\n\n")

file_names


['README_1.md',
 'README_2.md',
 'README_3.md',
 'README_4.md',
 'README_5.md',
 'README_6.md',
 'README_7.md',
 'README_8.md',
 'README_9.md',
 'README_10.md',
 'README_11.md',
 'README_12.md',
 'README_13.md',
 'README_14.md',
 'README_15.md',
 'README_16.md',
 'README_17.md',
 'README_18.md',
 'README_19.md',
 'README_20.md',
 'README_21.md',
 'README_22.md',
 'README_23.md',
 'README_24.md',
 'README_25.md',
 'README_26.md',
 'README_27.md',
 'README_28.md',
 'README_29.md',
 'README_30.md',
 'README_31.md',
 'README_32.md',
 'README_33.md',
 'README_34.md',
 'README_35.md',
 'README_36.md',
 'README_37.md',
 'README_38.md',
 'README_39.md',
 'README_40.md',
 'README_41.md',
 'README_42.md',
 'README_43.md']

In [10]:
import shutil
import os

# Ensure the target directory exists
target_directory = "./02_ReadME_Files/"
if not os.path.exists(target_directory):
    os.makedirs(target_directory)

# Move all README_X.md files to the specified location
for file_name in file_names:
    shutil.move(file_name, os.path.join(target_directory, file_name))

f"All README files have been moved to {target_directory}"


'All README files have been moved to ./02_ReadME_Files/'