In [5]:
import pandas as pd
import json
from datetime import datetime

# Load the CSV file
movies_df = pd.read_csv('marvel_movies_processed.csv')

# Load the JSON file
with open('marvel_network_with_metrics.json', 'r') as f:
    network_data = json.load(f)

# Create a dictionary to map characters to their movie appearances
character_movies = {}
characters_set = set()

for _, row in movies_df.iterrows():
    movie_name = row['Movie Name'].replace('-', ' ').title()  # Replace '-' with ' ' and convert to title case
    release_date = row['Release Date']
    characters = row['Characters'].split(', ')
    characters_set.update(characters)
    for character in characters:
        if character not in character_movies:
            character_movies[character] = []
        # Use a set to prevent duplicates
        movies_set = set(entry['movie_name'] for entry in character_movies[character])
        if movie_name not in movies_set:
            character_movies[character].append({
                'movie_name': movie_name,
                'release_date': release_date
            })

# Sort movies chronologically for each character
for character in character_movies:
    character_movies[character] = sorted(character_movies[character], key=lambda x: datetime.strptime(x['release_date'], '%B %d, %Y'), reverse=True)

# Create a dataframe to calculate correlations
characters_list = list(characters_set)
correlation_matrix = pd.DataFrame(0, index=characters_list, columns=characters_list)

# Fill the dataframe
for _, row in movies_df.iterrows():
    characters = row['Characters'].split(', ')
    for i in range(len(characters)):
        for j in range(i + 1, len(characters)):
            correlation_matrix.at[characters[i], characters[j]] += 1
            correlation_matrix.at[characters[j], characters[i]] += 1

# Calculate the correlation
correlation_matrix = correlation_matrix.corr().fillna(0)

for node in network_data['nodes']:
    character = node['id']
    node['movies'] = character_movies.get(character, [])
    if character in correlation_matrix.index:
        top_correlations = correlation_matrix[character].nlargest(4).index[1:4]
        node['top_correlations'] = {
            other: correlation_matrix.at[character, other]
            for other in top_correlations
        }

# Save the enriched JSON data
with open('marvel_network_with_metrics_correlation.json', 'w') as f:
    json.dump(network_data, f, indent=4)
