# Character Connection Analysis

### First Representation as a dataframe using pandas!

In [4]:
import pandas as pd

# Create a DataFrame to represent character connections
data = {
    'Bruce Wayne': ['Alfred', 'Batman', 'Lucius Fox', 'Rachel Dawes', 'James Gordon'],
    'Batman': ['Alfred', 'Bruce Wayne', 'Scare Crow', 'Ras al Ghul', 'James Gordon'],
    'Alfred': ['Bruce Wayne', 'Batman', 'N/A', 'N/A', 'N/A'],
    'Lucius Fox': ['Bruce Wayne', 'Batman', 'Alfred', 'N/A', 'N/A'],
    'Rachel Dawes': ['Bruce Wayne', 'N/A', 'N/A', 'N/A', 'N/A'],
    'Scare Crow': ['Batman', 'Falcone', 'Ras al Ghul', 'N/A', 'N/A'],
    'James Gordon': ['Batman', 'Bruce Wayne', 'Officers', 'N/A' ,'N/A'],
    'Officers': ['Batman', 'Bruce Wayne', 'James Gordon', 'N/A', 'N/A'],
    'Ras al Ghul': ['Bruce Wayne', 'Batman', 'Scarecrow', 'N/A', 'N/A'],
    'Falcone': ['Scare Crow', 'Batman', 'Ras al Ghul', 'N/A', 'N/A']  
}

df = pd.DataFrame(data)

print(df)




    Bruce Wayne        Batman       Alfred   Lucius Fox Rachel Dawes  \
0        Alfred        Alfred  Bruce Wayne  Bruce Wayne  Bruce Wayne   
1        Batman   Bruce Wayne       Batman       Batman          N/A   
2    Lucius Fox    Scare Crow          N/A       Alfred          N/A   
3  Rachel Dawes   Ras al Ghul          N/A          N/A          N/A   
4  James Gordon  James Gordon          N/A          N/A          N/A   

    Scare Crow James Gordon      Officers  Ras al Ghul      Falcone  
0       Batman       Batman        Batman  Bruce Wayne   Scare Crow  
1      Falcone  Bruce Wayne   Bruce Wayne       Batman       Batman  
2  Ras al Ghul     Officers  James Gordon    Scarecrow  Ras al Ghul  
3          N/A          N/A           N/A          N/A          N/A  
4          N/A          N/A           N/A          N/A          N/A  


### Let's clean this up a bit using tables! 

In [5]:
import pandas as pd

# Create a DataFrame to represent character connections
data = {
    'Character': ['Bruce Wayne', 'Batman', 'Alfred', 'Lucius Fox', 'Rachel Dawes', 'Scare Crow', 'James Gordon', 'Officers', 'Ras al Ghul', 'Falcone'],
    'Alfred': [0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    'Batman': [1, 0, 1, 1, 0, 1, 1, 1, 1, 1],
    'Lucius Fox': [1, 1, 0, 1, 0, 0, 0, 0, 0, 0],
    'Rachel Dawes': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Scare Crow': [0, 1, 0, 0, 0, 1, 0, 0, 1, 1],
    'James Gordon': [1, 1, 0, 0, 0, 0, 0, 1, 0, 0],
    'Officers': [1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
    'Ras al Ghul': [1, 1, 0, 0, 0, 1, 0, 0, 0, 1],
    'Falcone': [0, 1, 0, 0, 0, 1, 0, 0, 1, 0]
}

df = pd.DataFrame(data)
df.set_index('Character', inplace=True)

print(df)



              Alfred  Batman  Lucius Fox  Rachel Dawes  Scare Crow  \
Character                                                            
Bruce Wayne        0       1           1             1           0   
Batman             1       0           1             0           1   
Alfred             1       1           0             0           0   
Lucius Fox         0       1           1             0           0   
Rachel Dawes       0       0           0             0           0   
Scare Crow         0       1           0             0           1   
James Gordon       0       1           0             0           0   
Officers           0       1           0             0           0   
Ras al Ghul        0       1           0             0           1   
Falcone            0       1           0             0           1   

              James Gordon  Officers  Ras al Ghul  Falcone  
Character                                                   
Bruce Wayne              1         1 

### Who are the most importnant characters then? 
#### Let's use centrality to find the most important characters. 

In [8]:
import pandas as pd
import networkx as nx

# Create a DataFrame to represent character connections (as shown in the previous response)
data = {
    'Character': ['Bruce Wayne', 'Batman', 'Alfred', 'Lucius Fox', 'Rachel Dawes', 'Scare Crow', 'James Gordon', 'Officers', 'Ras al Ghul', 'Falcone'],
    'Alfred': [0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    'Batman': [1, 0, 1, 1, 0, 1, 1, 1, 1, 1],
    'Lucius Fox': [1, 1, 0, 1, 0, 0, 0, 0, 0, 0],
    'Rachel Dawes': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Scare Crow': [0, 1, 0, 0, 0, 1, 0, 0, 1, 1],
    'James Gordon': [1, 1, 0, 0, 0, 0, 0, 1, 0, 0],
    'Officers': [1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
    'Ras al Ghul': [1, 1, 0, 0, 0, 1, 0, 0, 0, 1],
    'Falcone': [0, 1, 0, 0, 0, 1, 0, 0, 1, 0]
}

df = pd.DataFrame(data)
df.set_index('Character', inplace=True)

# Transpose the DataFrame to have character names as columns
df = df.T

# Convert the DataFrame to a NetworkX graph for network analysis
G = nx.from_pandas_adjacency(df)

# Calculate degree centrality for each character
degree_centrality = nx.degree_centrality(G)

# Sort characters by degree centrality in descending order
sorted_characters = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)

# Get the top 3 most important characters
top_3_important_characters = sorted_characters[:3]

# Display the top 3 most important characters
print("Top 3 Most Important Characters:")
for character, centrality in top_3_important_characters:
    print(f"{character}: Degree Centrality = {centrality}")


Top 3 Most Important Characters:
Batman: Degree Centrality = 0.875
Scare Crow: Degree Centrality = 0.625
Alfred: Degree Centrality = 0.5
