Tasks:
    
Import necessary libraries and load the dataset.

Create a bar plot to visualize the number of passengers who survived and did not survive, broken down by gender.

Create a histogram to display the distribution of passengers' ages. Use different colors for passengers who survived and did not survive.

Create a violin plot to visualize the distribution of fare prices paid by passengers, broken down by their class (1st, 2nd, and 3rd class).

Generate a scatter plot to explore the relationship between passengers' ages and fare prices, using different colors and symbols for the different classes.

Use faceting to create a scatter plot matrix displaying the relationship between age, fare, and class for passengers who survived and did not survive.

Export your final visualizations as HTML files to share with others.

In [13]:
import plotly.subplots as sp
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np

In [5]:
data = pd.read_csv('titanic.csv')
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [6]:
data.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [49]:
data['Survived'] = data['Survived'].apply(str)
data['Pclass'] = data['Pclass'].apply(str)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    object 
 2   Pclass       891 non-null    object 
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          891 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        891 non-null    object 
 11  Embarked     891 non-null    object 
dtypes: float64(2), int64(3), object(7)
memory usage: 83.7+ KB


In [9]:
#Count missing values in column
data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [12]:
#Replace null values in Cabin with the a new category
data['Cabin'] = data['Cabin'].fillna('U')
data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin            0
Embarked         2
dtype: int64

In [15]:
#Replace null values in Age with the mean
data['Age'] = data['Age'].replace(np.NaN,data['Age'].mean())
data.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       2
dtype: int64

In [18]:
#Replace null values in Embarked with the most frequent category
data['Embarked'] = data['Embarked'].fillna('S')
data.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64

In [19]:
data.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,U,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,U,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,U,S


In [42]:
#Bar plot to visualize the number of passengers who survived and did not survive, broken down by gender.
fig = px.bar(data, x='Sex', y='PassengerId' ,  color='Survived', barmode="group",labels={'PassengerId':'Count of passengers'})
fig.show()

In [44]:
#Create a histogram to display the distribution of passengers' ages. Use different colors for passengers who survived and did not survive.
fig = px.histogram(data,x="Age", nbins=10, title='Histogram of Passengers Age Distribution', color='Survived')
fig.show()

In [50]:
#Create a violin plot to visualize the distribution of fare prices paid by passengers, broken down by their class 
fig = px.violin(data, x='Pclass', y='Fare', color='Pclass', title='Distribution of fare prices paid by passengers')
fig.show()

In [65]:
#Generate a scatter plot to explore the relationship between passengers' ages and fare prices.

fig = px.scatter(data, x='Age', y ='Fare', color='Pclass',symbol='Pclass',title='Scatter plot showing relationshipbetween passengers ages and fare prices')
fig.show()

In [97]:
#Use faceting to create a scatter plot matrix displaying the relationship between age, fare, and class for passengers who survived and did not survive.
fig = px.scatter_matrix(data,
    dimensions=["Age", "Pclass", "Fare"],
    color="Survived", symbol="Survived",
    title="Scatter matrix of titanic data set",
                       )
#fig.update_traces(diagonal_visible=False)
fig.show()

In [98]:

 import plotly.io as pio

In [99]:
pio.write_html(fig, 'scatter_plot.html')