In [None]:
import numpy as np 
import pandas as pd 
import random
from collections import Counter

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import plotting
from pandas.plotting import parallel_coordinates

# for interactive visualizations
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected = True)
import plotly.figure_factory as ff

In [None]:
dc = pd.read_csv('dc-wikia-data.csv')
dc.head()

In [None]:
marvel = pd.read_csv('marvel-wikia-data.csv')
marvel.head()

In [None]:
dc.isnull().sum()

In [None]:
dc['ID'] = dc['ID'].fillna(dc['ID'].mode()[0])
dc['ALIGN'] = dc['ALIGN'].fillna(dc['ALIGN'].mode()[0])
dc['EYE'].fillna(dc['EYE'].mode()[0], inplace = True)
dc['HAIR'].fillna(dc['HAIR'].mode()[0], inplace = True)
dc['SEX'].fillna(dc['SEX'].mode()[0], inplace = True)
dc['ALIVE'].fillna(dc['ALIVE'].mode()[0], inplace = True)
dc['APPEARANCES'].fillna(dc['APPEARANCES'].mode()[0], inplace = True)
dc['FIRST APPEARANCE'].fillna(dc['FIRST APPEARANCE'].mode()[0], inplace = True)
dc['YEAR'].fillna(dc['YEAR'].mode()[0], inplace = True)

marvel['ID'] = marvel['ID'].fillna(marvel['ID'].mode()[0])
marvel['ALIGN'] = marvel['ALIGN'].fillna(marvel['ALIGN'].mode()[0])
marvel['EYE'].fillna(marvel['EYE'].mode()[0], inplace = True)
marvel['HAIR'].fillna(marvel['HAIR'].mode()[0], inplace = True)
marvel['SEX'].fillna(marvel['SEX'].mode()[0], inplace = True)
marvel['ALIVE'].fillna(marvel['ALIVE'].mode()[0], inplace = True)
marvel['APPEARANCES'].fillna(marvel['APPEARANCES'].mode()[0], inplace = True)
marvel['FIRST APPEARANCE'].fillna(marvel['FIRST APPEARANCE'].mode()[0], inplace = True)
marvel['Year'].fillna(marvel['Year'].mode()[0], inplace = True)

In [None]:
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (20, 10)
plt.subplot(2, 1, 1)
sns.violinplot(dc['ID'], dc['YEAR'], hue = dc['ALIGN'], palette = 'PuRd')
plt.xlabel(' ')
plt.title('DC', fontsize = 30)
plt.subplot(2, 1, 2)
sns.violinplot(marvel['ID'], marvel['Year'], hue = marvel['ALIGN'], palette = 'copper')
plt.title('MARVEL', fontsize = 30)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (20, 8)
plt.style.use('fivethirtyeight')

dc['APPEARANCES'].fillna(0, inplace = True)
marvel['APPEARANCES'].fillna(0, inplace = True)

import warnings
warnings.filterwarnings('ignore')

plt.subplot(1, 2, 1)
sns.kdeplot(dc['APPEARANCES'], color = 'green')
plt.title('DC')

plt.subplot(1, 2, 2)
sns.kdeplot(marvel['APPEARANCES'], color = 'skyblue')
plt.title('Marvel')

plt.suptitle('Appearances comparison vs DC and Marvel', fontsize = 20)
plt.show()

In [None]:
trace1 = go.Histogram(
         x = dc['ID'],
         name = 'DC',
         opacity = 0.75,
         marker = dict(
               color = 'rgb(52, 85, 159, 0.6)'
         )
)
trace2 = go.Histogram(
          x = marvel['ID'],
          name = 'Marvel',
          opacity = 0.75,
          marker = dict(
                 color = 'rgb(84, 52, 15, 0.6)'
          )
)
data = [trace1, trace2]

layout = go.Layout(
    barmode = 'group',
    title = 'Comparison of Identities')

fig = go.Figure(data = data, layout = layout)
py.iplot(fig)

In [None]:
trace = go.Box(
            x = dc['ALIGN'],
            y = dc['APPEARANCES'],
            name = 'DC',
            marker = dict(
                  color = 'rgb(145, 65, 75)')
)
                   

trace2 = go.Box(
            x = marvel['ALIGN'],
            y = marvel['APPEARANCES'],
            name = 'Marvel',
            marker = dict(
                   color = 'rgb(2, 15, 85)'),

              )

data = [trace, trace2]

layout = go.Layout(
    boxmode = 'group',
    title = 'Character vs Appearances')

fig = go.Figure(data = data, layout = layout)
py.iplot(fig)

In [None]:
dcdist = dc['SEX'].value_counts()
marveldist = marvel['SEX'].value_counts()
dctrace = go.Pie(labels = dcdist.index, values = dcdist.values, name = 'DC', domain = {"column": 0})
marveltrace = go.Pie(labels = marveldist.index, values = marveldist.values, name = 'Marvel', domain = {"column": 1})
layout = go.Layout(title = 'Gender Distributions of DC and Marvel Respectively', width = 750, grid = {"rows": 1, "columns": 2})
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = layout))

In [None]:
dcdist = dc['ALIVE'].value_counts() 
marveldist = marvel['ALIVE'].value_counts()
dctrace = go.Bar(x = dcdist.index, y = dcdist.values, name = 'DC', marker=dict(color='rgb(20,61,89)'))
marveltrace = go.Bar(x = marveldist.index, y = marveldist.values, name = 'Marvel', marker=dict(color='rgb(244,180,26)'))
py.iplot(go.Figure(data = [dctrace, marveltrace], layout = go.Layout(title = 'Mortality Distribution', width=600, barmode='stack')))