In [2]:
import pandas as pd

# Load your CSV
df = pd.read_csv("reddit_posts.csv")

# Add an ID column starting from 1
df.insert(0, "id", range(1, len(df) + 1))

# Save it back to the same file (or a new one if you want to keep the original)
df.to_csv("reddit_posts_with_id.csv", index=False)

print("✅ Added 'id' column and saved to reddit_posts_with_id.csv")


✅ Added 'id' column and saved to reddit_posts_with_id.csv


In [3]:
import pandas as pd

# Load file with id column
df = pd.read_csv("reddit_posts_with_id.csv")

# Create a context column for Potato
df["context"] = df.apply(
    lambda row: f'What is the sentiment towards **{row["character_name"]}** from *{row["series_name"]}* in this post?',
    axis=1
)

# Save new file
df.to_csv("reddit_posts_with_context.csv", index=False)

print("✅ Added 'context' column for Potato display.")


✅ Added 'context' column for Potato display.


In [7]:
import pandas as pd

df = pd.read_csv("reddit_posts_with_context.csv")
df["full_text"] = (
    "Character: " + df["character_name"] +
    " | Series: " + df["series_name"] +
    "\n\n" +
    "\n\n"+
    df["post_text"]
)
df.to_csv("reddit_posts_with_context.csv", index=False)


In [6]:
import pandas as pd

# ---- Config ----
csv_file = "reddit_posts_with_context.csv"  # your CSV file
character_col = "character_name"
series_col = "series_name"
output_tsv = "keywords.tsv"

# Read CSV
df = pd.read_csv(csv_file)

# Remove duplicates
df = df.drop_duplicates(subset=[character_col, series_col])

# Assign label per series
series_to_label = {}
labels = []
for series in df[series_col].unique():
    series_to_label[series] = f"color_{len(series_to_label) + 1}"

# Build TSV rows
rows = []
for _, row in df.iterrows():
    word = row[character_col]
    label = series_to_label[row[series_col]]
    schema = "highlight"  # You can name this the same as your Potato schema if needed
    rows.append((word, label, schema))

# Create DataFrame
tsv_df = pd.DataFrame(rows, columns=["Word", "Label", "Schema"])

# Save TSV
tsv_df.to_csv(output_tsv, sep="\t", index=False)
print(f"keywords.tsv saved with {len(tsv_df)} entries")


keywords.tsv saved with 66 entries


In [8]:
import csv
import json

def csv_to_json(input_csv, output_json):
    data = []
    
    with open(input_csv, mode='r', encoding='utf-8') as csv_file:
        reader = csv.DictReader(csv_file)
        
        for row in reader:
            series_name = row['series_name']
            character_name = row['character_name']
            post_text = row['post_text']
            
            # Create formatted text
            series_text = f"<strong>Series: </strong>{series_name}"
            character_text = f"<strong>Character: </strong>{character_name}"
            post_text_formatted = f"<strong>Post: </strong>{post_text.strip()}"
            
            # Add to data list
            data.append({
                "id": row['id'],
                "text": [series_text, character_text, post_text_formatted]
            })
    
    # Write to JSON file (one object per line)
    with open(output_json, mode='w', encoding='utf-8') as json_file:
        for entry in data:
            json_file.write(json.dumps(entry, ensure_ascii=False) + "\n")

# Example usage:
csv_to_json("reddit_posts_with_context.csv", "output.json")


In [9]:
import pandas as pd
import os

def split_and_expand_csv(input_csv, output_dir):
    # Read the input CSV
    df = pd.read_csv(input_csv)
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Group by series_name
    for series, group in df.groupby('series_name'):
        expanded_rows = []
        
        for _, row in group.iterrows():
            characters = [char.strip() for char in str(row['character_names_4o_p3']).split(',')]
            
            for character in characters:
                new_row = row.to_dict()
                new_row['character_name'] = character  # Add new column
                expanded_rows.append(new_row)
        
        # Create DataFrame for this series
        expanded_df = pd.DataFrame(expanded_rows)
        
        # Save file as "<series_name>_potato_first_round.csv"
        file_name = f"{series}_potato_first_round.csv".replace(" ", "_")
        output_path = os.path.join(output_dir, file_name)
        expanded_df.to_csv(output_path, index=False, encoding='utf-8')
        
        print(f"Saved: {output_path}")

# Example usage
split_and_expand_csv("AllRelevantPostsFromAllSeries.csv", "output_series_files")


Saved: output_series_files/BoJackHorseman_potato_first_round.csv
Saved: output_series_files/BridgertonNetflix_potato_first_round.csv
Saved: output_series_files/Daredevil_potato_first_round.csv
Saved: output_series_files/EmilyInParis_potato_first_round.csv
Saved: output_series_files/FromSeries_potato_first_round.csv
Saved: output_series_files/HouseOfTheDragon_potato_first_round.csv
Saved: output_series_files/Invincible_potato_first_round.csv
Saved: output_series_files/LOTR_on_Prime_potato_first_round.csv
Saved: output_series_files/MrRobot_potato_first_round.csv
Saved: output_series_files/PeakyBlinders_potato_first_round.csv
Saved: output_series_files/SeveranceAppleTVPlus_potato_first_round.csv
Saved: output_series_files/StrangerThings_potato_first_round.csv
Saved: output_series_files/SuccessionTV_potato_first_round.csv
Saved: output_series_files/TheBoys_potato_first_round.csv
Saved: output_series_files/Yellowjackets_potato_first_round.csv
Saved: output_series_files/YouOnLifetime_potato_

In [4]:
import csv
import json

def csv_to_json(input_csv, output_json):
    data = []
    
    with open(input_csv, mode='r', encoding='utf-8') as csv_file:
        reader = csv.DictReader(csv_file)
        
        for row in reader:
            series_name = row['series']
            character_name = row['character_to_annotate']
            post_text = row['title'] + "\n\n" + row['selftext']
            character_link = row.get('Link to character', '').strip()  # Get link if exists
            
            # Create formatted text
            series_text = f"<strong>Series: </strong>{series_name}"
            
            # Add link to character name if available
            if character_link:
                character_text = f'<strong>Character: </strong><a href="{character_link}" target="_blank">{character_name}</a>'
            else:
                character_text = f"<strong>Character: </strong>{character_name}"
            
            post_text_formatted = f"<strong>Post: </strong>{post_text.strip()}"
            
            # Add to data list
            data.append({
                "id": row['id'],
                "text": [series_text, character_text, post_text_formatted]
            })
    
    # Write to JSON file (one object per line)
    with open(output_json, mode='w', encoding='utf-8') as json_file:
        for entry in data:
            json_file.write(json.dumps(entry, ensure_ascii=False) + "\n")



In [None]:

# Example usage:
csv_to_json("reddit_posts_with_context_with_single_links.csv", "testingfandomlinks.json")

In [5]:
csv_to_json("ProlificBoJackHorseman.csv", "ProlificBoJackHorseman.json")