In [5]:
import pandas as pd
from collections import defaultdict

# Function to get unique pairs from a list of characters
def get_unique_pairs(characters):
    pairs = set()
    for i in range(len(characters)):
        for j in range(i + 1, len(characters)):
            pairs.add(tuple(sorted([characters[i], characters[j]])))
    return pairs

# Load the CSV file into a DataFrame
file_path = '../files/Game_of_Thrones_Script.csv'
got_df = pd.read_csv(file_path)

# Remove rows where 'Name' is NaN
got_df_cleaned = got_df.dropna(subset=['Name'])

# Preparing a dictionary to hold interactions per season
interactions_per_season = defaultdict(lambda: defaultdict(int))

for i in range(len(got_df_cleaned)):
    current_season = got_df_cleaned.iloc[i]['Season']
    current_speaker = got_df_cleaned.iloc[i]['Name']

    # Next 5 lines or to the end of the dataset
    next_lines = got_df_cleaned.iloc[i+1:i+6]

    # Characters in the current conversation excluding the current speaker
    characters_in_conversation = list(next_lines['Name'].unique())
    if current_speaker in characters_in_conversation:
        characters_in_conversation.remove(current_speaker)

    # Get all unique pairs of characters in this conversation
    pairs = get_unique_pairs(characters_in_conversation)

    # Update the interaction count for each pair
    for pair in pairs:
        interactions_per_season[current_season][pair] += 1

# Convert to DataFrame for easier analysis and visualization
interactions_df = pd.DataFrame([
    {'Season': season, 'Pair': pair, 'Interactions': count}
    for season, pairs in interactions_per_season.items()
    for pair, count in pairs.items()
])

# Displaying a sample of the final DataFrame
interactions_df.sample(10)


Unnamed: 0,Season,Pair,Interactions
6283,Season 8,"(grey worm, sansa stark)",11
6218,Season 8,"(daenerys targaryen, sam)",2
2627,Season 3,"(margaery tyrell, ygritte)",2
100,Season 1,"(catelyn stark, daenerys targaryen)",7
4796,Season 6,"(bran stark, young ned)",12
1873,Season 2,"(dagmer, osha)",2
121,Season 1,"(khal drogo, viserys)",4
3594,Season 4,"(alliser, sam)",13
3615,Season 4,"(arya stark, daenerys targaryen)",1
2842,Season 3,"(catelyn stark, sandor clegane)",5


In [9]:
# Filtering the dataframe for Season 1 and sorting by the number of interactions
season_1_interactions = interactions_df[interactions_df['Season'] == 'Season 8']
top_15_interactions_season_1 = season_1_interactions.sort_values(by='Interactions', ascending=False).head(15)

top_15_interactions_season_1


Unnamed: 0,Season,Pair,Interactions
6011,Season 8,"(davos, tyrion lannister)",44
5930,Season 8,"(jon snow, tyrion lannister)",42
5932,Season 8,"(bran stark, tyrion lannister)",36
5937,Season 8,"(daenerys targaryen, jon snow)",34
6040,Season 8,"(jaime lannister, tyrion lannister)",32
6054,Season 8,"(brienne, tyrion lannister)",29
6134,Season 8,"(man, soldier)",27
5936,Season 8,"(jon snow, sansa stark)",26
5968,Season 8,"(arya stark, jon snow)",25
5956,Season 8,"(daenerys targaryen, tyrion lannister)",25
