In [18]:
import pandas as pd
import plotly.express as px

file_folder = ('train.csv')
titanic_df = pd.read_csv(file_folder)


In [19]:
# Drop rows with any missing values to ensure every point is a complete record

interactive_data = titanic_df.dropna(subset=['Age', 'Fare',
                                             'Pclass', 'Survived'])

In [20]:
# We'll create a new column with descriptive names for survival status

interactive_data['Survival'] = interactive_data['Survived'].\
                               map({0: 'Perished', 1: 'Survived'})



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [21]:
# Let's inspect the data we'll use for the hover-tooltip

print("--- Data for Interactive Plot ---")
print(interactive_data[['Age', 'Fare', 'Name', 'Pclass', 'Survival']].head())

--- Data for Interactive Plot ---
    Age     Fare                                               Name  Pclass  \
0  22.0   7.2500                            Braund, Mr. Owen Harris       3   
1  38.0  71.2833  Cumings, Mrs. John Bradley (Florence Briggs Th...       1   
2  26.0   7.9250                             Heikkinen, Miss. Laina       3   
3  35.0  53.1000       Futrelle, Mrs. Jacques Heath (Lily May Peel)       1   
4  35.0   8.0500                           Allen, Mr. William Henry       3   

   Survival  
0  Perished  
1  Survived  
2  Survived  
3  Survived  
4  Perished  


In [22]:
# Create the interactive scatter plot
fig = px.scatter(interactive_data, 
                 x='Age', 
                 y='Fare', 
                 color='Survival', 
                 color_discrete_map={'Survived':'forestgreen', 
                                     'Perished':'tomato'},
                 hover_data=['Name', 'Pclass', 'Survival'],
                 title='Age vs. Fare on the Titanic (Interactive)')

In [23]:
# Add some customization to the plot

fig.update_layout(
    xaxis_title="Age (Years)",
    yaxis_title="Fare ($)",
    legend_title="Survival Status"
)
fig.show()

In [24]:
# Create a FamilySize column as our "animation frame"

interactive_data['FamilySize'] = \
interactive_data['SibSp'] + interactive_data['Parch'] + 1



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [25]:
# We need a count of passengers for each Pclass at each FamilySize

animated_data = interactive_data.groupby(['FamilySize', 'Pclass']).\
                                  size().reset_index(name='Count')
print("--- Data for Animated Plot ---")
print(animated_data.head())

--- Data for Animated Plot ---
   FamilySize  Pclass  Count
0           1       1     86
1           1       2     93
2           1       3    225
3           2       1     63
4           2       2     34


In [26]:
# Create the animated bar chart
fig = px.bar(animated_data, 
             x='Pclass', 
             y='Count', 
             animation_frame='FamilySize', 
             range_y=[0, max(animated_data['Count'])],
             color='Pclass',
             title='Passenger Class Count by Family Size (Animated)')

In [None]:
# Update the layout for better readability
fig.update_layout(
    xaxis_title="Passenger Class",
    yaxis_title="Number of Passengers",
    xaxis={'categoryorder':'array', 'categoryarray':[1, 2, 3]} 
)

# --- CODE FOR ANIMATION SPEED CONTROL ---
# This part modifies the animation controls

fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 2000   # 2000 milliseconds = 2 seconds per frame
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500 # Smooth transition over 500 milliseconds

fig.show()