# Wizualizacja danych

Projekt stworzony w Google Colab. Końcowy efekt pracy to dashboard znajdujący się w ostatnich komórkach pliku.

In [3]:
#Wczytanie danych
import pandas as pd
file_id = '1iN9t9zZzDC7PRFjQFBhRN9YFU60D1eGH'
url = f'https://drive.google.com/uc?id={file_id}'
df = pd.read_csv(url, sep=",")

In [4]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [34]:
from bokeh.io import curdoc, output_notebook, show
from bokeh.models import ColumnDataSource, Grid, LinearAxis, Plot, Quad, LabelSet
from bokeh.models import Panel, Tabs
from bokeh.plotting import figure
from bokeh.layouts import gridplot
from bokeh.transform import cumsum
from bokeh.layouts import column
from bokeh.models import TabPanel, Tabs
output_notebook()

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from math import pi
from numpy import histogram, linspace

In [8]:
data1 = df.loc[(df['Sex'] == "female")]["Age"]
data2 = df.loc[(df['Sex'] == "female") & (df['Survived'] == 1)]["Age"]

arr, edges = np.histogram(data1, bins = int(80/5), range = [0, 80])
arr2, edges2 = np.histogram(data2, bins = int(80/5), range = [0, 80])

# Put the information in a dataframe
FSA1 = pd.DataFrame({'Age': arr,
                       'left': edges[:-1],
                       'right': edges[1:]})

# Put the information in a dataframe
FSA2 = pd.DataFrame({'Age': arr2,
                       'left': edges2[:-1],
                       'right': edges2[1:]})

In [10]:
# Create the blank plot
plotFSA = figure(height = 600, width = 600,
           title = 'Number of female passengers and survivors by age',
          x_axis_label = 'Age',
           y_axis_label = 'Female Passengers')

# Add a quad glyph
plotFSA.quad(bottom=0, top=FSA1['Age'],
       left=FSA1['left'], right=FSA1['right'],
       fill_color='#ff5a5f', line_color='#ff5a5f', line_alpha=0.1, alpha=0.6, legend_label="Total Number")

# Add a quad glyph
plotFSA.quad(bottom=0, top=FSA2['Age'],
       left=FSA2['left'], right=FSA2['right'],
       fill_color='#780000', line_color='#780000', line_alpha=0.1, alpha=0.6, legend_label="Survivors")

plotFSA.title.align = "center"

In [None]:
show(plotFSA)

In [11]:
histData = df.loc[(df['Sex'] == "male")]["Age"]
histData2 = df.loc[(df['Sex'] == "male") & (df['Survived'] == 1)]["Age"]

arr, edges = np.histogram(histData, bins = int(80/5), range = [0, 80])
arr2, edges2 = np.histogram(histData2, bins = int(80/5), range = [0, 80])

# Put the information in a dataframe
MSA1 = pd.DataFrame({'Age': arr,
                       'left': edges[:-1],
                       'right': edges[1:]})

# Put the information in a dataframe
MSA2 = pd.DataFrame({'Age': arr2,
                       'left': edges2[:-1],
                       'right': edges2[1:]})

In [12]:
# Create the blank plot
plotMSA = figure(height = 600, width = 600,
           title = 'Number of male passengers and survivors by age',
          x_axis_label = 'Age',
           y_axis_label = 'Male Passengers')

# Add a quad glyph
plotMSA.quad(bottom=0, top=MSA1['Age'],
       left=MSA1['left'], right=MSA1['right'],
       fill_color='#8ecae6', line_color='#8ecae6', line_alpha=0.1, alpha=0.6, legend_label="Total Number")

# Add a quad glyph
plotMSA.quad(bottom=0, top=MSA2['Age'],
       left=MSA2['left'], right=MSA2['right'],
       fill_color='#1d3557', line_color='#1d3557', line_alpha=0.1, alpha=0.6, legend_label="Survivors")

plotMSA.title.align = "center"

In [13]:
show(plotMSA)

In [14]:
labels = ["female survivors", "female victims", "male victims", "male survivors"]
pieVal = [len(df[(df["Sex"]=="female") & (df["Survived"]==1)]),
           len(df[(df["Sex"]=="female") & (df["Survived"]==0)]),
           len(df[(df["Sex"]=="male") & (df["Survived"]==0)]),
           len(df[(df["Sex"]=="male") & (df["Survived"]==1)])]

pieDict = dict(zip(labels, pieVal))

pieData = pd.Series(pieDict).reset_index(name='value').rename(columns={'index': 'label'})
pieData['angle'] = pieData['value']/pieData['value'].sum() * 2*pi
pieData['color'] = ["red", "pink", "#ADD8E6", "#1E90FF"]
pieData['percent'] = pieData['value'] / sum(pieDict.values()) * 100

plotMF = figure(height=350, title="Survivors and victims by sex", toolbar_location=None,
           tools="hover", tooltips="@label: @value", x_range=(-0.5, 1.0))

plotMF.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='label', source=pieData)

plotMF.axis.axis_label = None
plotMF.axis.visible = False
plotMF.grid.grid_line_color = None
plotMF.title.align = "center"

In [15]:
show(plotMF)

In [37]:
def plotClass(x):
  labels = ["female survivors", "female victims", "male victims", "male survivors"]
  pieVal = [len(df[(df["Sex"]=="female") & (df["Survived"]==1) & (df["Pclass"]==x)]),
            len(df[(df["Sex"]=="female") & (df["Survived"]==0) & (df["Pclass"]==x)]),
            len(df[(df["Sex"]=="male") & (df["Survived"]==0) & (df["Pclass"]==x)]),
            len(df[(df["Sex"]=="male") & (df["Survived"]==1) & (df["Pclass"]==x)])]

  pieDict = dict(zip(labels, pieVal))

  pieData = pd.Series(pieDict).reset_index(name='value').rename(columns={'index': 'label'})
  pieData['angle'] = pieData['value']/pieData['value'].sum() * 2*pi
  pieData['color'] = ["red", "pink", "#ADD8E6", "#1E90FF"]
  pieData['percent'] = pieData['value'] / sum(pieDict.values()) * 100

  p = figure(height=350, title='Survivors and victims by class', toolbar_location=None,
            tools="hover", tooltips="@label: @value", x_range=(-0.5, 1.0))

  p.wedge(x=0, y=1, radius=0.4,
          start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
          line_color="white", fill_color='color', legend_field='label', source=pieData)

  p.axis.axis_label = None
  p.axis.visible = False
  p.grid.grid_line_color = None
  p.title.align = "center"

  return p

tab1 = TabPanel(child=plotClass(1), title="Class 1")
tab2 = TabPanel(child=plotClass(2), title="Class 2")
tab3 = TabPanel(child=plotClass(3), title="Class 3")
tabs = Tabs(tabs=[tab1, tab2, tab3])

In [None]:
show(tabs)

In [38]:
labels = ["First class", "Second class", "Third class"]
labels2 = ["survivors", "victims"]
colors = ["#fca311", "#14213d"]

barVal = [len(df[(df["Pclass"]==1) & (df["Survived"]==1)]),
           len(df[(df["Pclass"]==2) & (df["Survived"]==1)]),
           len(df[(df["Pclass"]==3) & (df["Survived"]==1)])]

barVal2 = [len(df[(df["Pclass"]==1) & (df["Survived"]==0)]),
            len(df[(df["Pclass"]==2) & (df["Survived"]==0)]),
            len(df[(df["Pclass"]==3) & (df["Survived"]==0)])]

barData= {'class' : labels,
        'survivors' : barVal,
        'victims' : barVal2}

plotBar = figure(x_range=labels, height=250, title="Survivors and victims by class",
           toolbar_location=None)

plotBar.vbar_stack(labels2, x='class', width=0.9, source=barData, color=colors,
             legend_label=labels2)


plotBar.y_range.start = 0
plotBar.x_range.range_padding = 0.1
plotBar.xgrid.grid_line_color = None
plotBar.axis.minor_tick_line_color = None
plotBar.outline_line_color = None
plotBar.legend.location = "top_left"
plotBar.legend.orientation = "horizontal"
plotBar.title.align = "center"

In [39]:
show(plotBar)

In [41]:
def plotFare(x):
  data1 = df.loc[(df['Pclass'] == 1) & (df['Embarked'] == x)]["Fare"]
  data2 = df.loc[(df['Pclass'] == 2) & (df['Embarked'] == x)]["Fare"]
  data3 = df.loc[(df['Pclass'] == 3) & (df['Embarked'] == x)]["Fare"]

  arr, edges = np.histogram(data1, bins = int(300/5), range = [0, 300])
  arr2, edges2 = np.histogram(data2, bins = int(300/5), range = [0, 300])
  arr3, edges3 = np.histogram(data3, bins = int(300/5), range = [0, 300])

  # Put the information in a dataframe
  F1 = pd.DataFrame({'Fare': arr,
                        'left': edges[:-1],
                        'right': edges[1:]})

  # Put the information in a dataframe
  F2 = pd.DataFrame({'Fare': arr2,
                        'left': edges2[:-1],
                        'right': edges2[1:]})

  # Put the information in a dataframe
  F3 = pd.DataFrame({'Fare': arr3,
                        'left': edges3[:-1],
                        'right': edges3[1:]})

  # Create the blank plot
  plotF = figure(height = 400, width = 1200,
            title = 'Fare by class',
            x_axis_label = 'Fare',
            y_axis_label = 'Number of Passengers')

  # Add a quad glyph
  plotF.quad(bottom=0, top=F1['Fare'],
        left=F1['left'], right=F1['right'],
        fill_color='#000000', line_color='#000000', line_alpha=0.1, alpha=0.6, legend_label="First Class")

  # Add a quad glyph
  plotF.quad(bottom=0, top=F2['Fare'],
        left=F2['left'], right=F2['right'],
        fill_color='#fca311', line_color='#fca311', line_alpha=0.1, alpha=0.6, legend_label="Second Class")

  # Add a quad glyph
  plotF.quad(bottom=0, top=F3['Fare'],
        left=F3['left'], right=F3['right'],
        fill_color='#94d2bd', line_color='#94d2bd', line_alpha=0.1, alpha=0.6, legend_label="Third Class")

  plotF.title.align = "center"

  return plotF

tabF1 = TabPanel(child=plotFare('S'), title="Southampton")
tabF2 = TabPanel(child=plotFare('Q'), title="Queenstown")
tabF3 = TabPanel(child=plotFare('C'), title="Cherbourg")
tabsF = Tabs(tabs=[tabF1, tabF2, tabF3 ])

In [42]:
show(tabsF)

In [43]:
from IPython.display import Javascript

In [44]:
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))
p = gridplot([[plotFSA,plotMSA],[tabs,plotMF,plotBar]])
show(p)

<IPython.core.display.Javascript object>

In [45]:
show(tabsF)