In [218]:
import pandas as pd
import plotly.express as px



# Assigment based on plotly express


## Table of contents

#### Title: Exploratory Data Analysis of the Titanic Dataset using Plotly Express.
 
### Objective:
In this assignment, you will perform an exploratory data analysis on the Titanic dataset using Plotly Express. You will create visualizations to gain insights into the passengers' demographics and their chances of survival.

#### Dataset:
The Titanic dataset can be found here: https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv

#### Tasks:
- Import necessary libraries and load the dataset.

- Create a bar plot to visualize the number of passengers who survived and did not survive, broken down by gender.

- Create a histogram to display the distribution of passengers' ages. Use different colors for passengers who survived and did not survive.

- Create a violin plot to visualize the distribution of fare prices paid by passengers, broken down by their class (1st, 2nd, and 3rd class).

- Generate a scatter plot to explore the relationship between passengers' ages and fare prices, using different colors and symbols for the different classes.

- Use faceting to create a scatter plot matrix displaying the relationship between age, fare, and class for passengers who survived and did not survive.

- Export your final visualizations as HTML files to share with others.

##### Hints:

- To load the dataset from the provided URL, use the pandas.read_csv() function.

- For each task, create a separate code cell in your Jupyter Notebook and use comments to describe the purpose of the code.

- To create different types of plots, use the appropriate Plotly Express functions, such as px.bar(), px.histogram(), px.violin(), and px.scatter().

Use the color, facet_col, facet_row, and other parameters to break down the data by categories.

- Refer to the Plotly Express documentation for more information on customizing your plots: https://plotly.com/python/plotly-express/

- To export your visualizations as HTML files, use the plotly.io.write_html() function.

- Once you have completed the assignment, submit your Jupyter Notebook with the code, visualizations, and a brief interpretation of the insights gained from each plot.

### Solution

#### Answer 1 : Library importing and loading data

In [219]:
import pandas as pd
import plotly.express as px

# creating a data source link

url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'

#reading the data in the url above
data = pd.read_csv(url)

#display the nature of the data
data.head()




Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [220]:
data.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [221]:
data[data.isnull()]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,,,,,,,,,,,,
1,,,,,,,,,,,,
2,,,,,,,,,,,,
3,,,,,,,,,,,,
4,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
886,,,,,,,,,,,,
887,,,,,,,,,,,,
888,,,,,,,,,,,,
889,,,,,,,,,,,,


In [222]:
data[data.isnull()].drop

data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [223]:
import numpy as np

[[data.PassengerId == np.nan]] 

[[0      False
  1      False
  2      False
  3      False
  4      False
         ...  
  886    False
  887    False
  888    False
  889    False
  890    False
  Name: PassengerId, Length: 891, dtype: bool]]

In [224]:
data.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


#### Answer 2: Create a bar plot to visualize the number of passengers who survived and did not survive, broken down by gender.

In [225]:
#Creating the bar plot
fig = px.bar(data,
             y = 'Survived',
             x = 'Sex',
             color= 'Survived',
             title='Bar plot to visualize the number of passengers who survived and did not survive'
             )

#showing the plotting
fig.show()



#### Answer 3 : Create a histogram to display the distribution of passengers' ages. Use different colors for passengers who survived and did not survive.

In [226]:
#Creating the bar plot
fig = px.histogram(data,
             y = 'Survived',
             x = 'Age',
             nbins=10,
             color= 'Survived',
             title='Histogram plot displaying the distribution of passengers age'
             )

#showing the plotting
fig.show()



#### Answer 4 : Violin plot to visualize the distribution of fare prices paid by passengers, broken down by their class (1st, 2nd, and 3rd class).

In [227]:
#Creating the violin plot
fig = px.violin(data,
             y = 'Fare',
             x = 'Pclass',
             color= 'Pclass',
             title='Violin plot visualizing the distribution of fare prices paid by passengers, broken down by their class (1st, 2nd, and 3rd class)'
             )

#showing the plotting
fig.show()

#### Answer 5: Generate a scatter plot to explore the relationship between passengers' ages and fare prices, using different colors and symbols for the different classes.

In [228]:
#Creating the scatter plot
fig = px.scatter(data,
             y = 'Fare',
             x = 'Age',
             color= 'Pclass',
             title='scatter plot to explore the relationship between passengers ages and fare prices'
             )

#showing the plotting
fig.show()

#### Answer 6 : Use faceting to create a scatter plot matrix displaying the relationship between age, fare, and class for passengers who survived and did not survive.