In [1]:
import pandas as pd
import plotly.express as px
# import plotly.graph_objects as go
# import plotly.io as pio

# Load the CSV file into a DataFrame
df = pd.read_csv('Student Profiles Wrangled.csv')

In [2]:
def categorize_gpa(row):
    gpa = row['GPA']
    if gpa < 1:
        return 'Less Than 1'
    elif gpa < 2 and gpa >= 1:
        return '1 - 2'
    elif gpa < 3 and gpa >= 2:
        return '2 - 3'
    elif gpa < 3.5 and gpa >= 3:
        return '3 - 3.5'
    else:
        return 'More Than 3.5'

df['GPA Category'] = df.apply(categorize_gpa, axis=1)

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 23 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   STUDENT ID                             244 non-null    object 
 1   SALUTATION                             244 non-null    object 
 2   GENDER                                 244 non-null    object 
 3   NATIONALITY                            244 non-null    object 
 4   DOB                                    244 non-null    object 
 5   HIGHEST QUALIFICATION                  244 non-null    object 
 6   NAME OF QUALIFICATION AND INSTITUTION  244 non-null    object 
 7   DATE ATTAINED HIGHEST QUALIFICATION    244 non-null    object 
 8   DESIGNATION                            244 non-null    object 
 9   INTAKE NO                              244 non-null    object 
 10  COMMENCEMENT DATE                      244 non-null    object 
 11  COMPLE

In [4]:
df.head(1)

Unnamed: 0,STUDENT ID,SALUTATION,GENDER,NATIONALITY,DOB,HIGHEST QUALIFICATION,NAME OF QUALIFICATION AND INSTITUTION,DATE ATTAINED HIGHEST QUALIFICATION,DESIGNATION,INTAKE NO,...,COURSE FUNDING,REGISTRATION FEE,PAYMENT MODE,COURSE FEE,GPA,CITIZENSHIP_STATUS,date_diff,COURSE,Age,GPA Category
0,2020/1101-013/001,Ms,F,Singaporean,1978-04-03,Certificate,"Certificate in Office Skills, ITE",2016-11-06,"Snr Associate, Client Services",13th,...,Subsidiesed,107,NETS,1712,3.0,L,456,1101,46,3 - 3.5


In [7]:
fig = px.sunburst(df, path=['GENDER', 'CITIZENSHIP_STATUS', 'GPA Category'],template='plotly_dark',title='Testing Sunburst')
fig.show()

In [4]:
data = df.sort_values('DATE ATTAINED HIGHEST QUALIFICATION')

In [5]:
# Using plotly.express
import plotly.express as px

fig = px.line(df, x='DOB', y="COURSE FEE")
fig.show()

In [6]:
import plotly.express as px

# 1. Gender Distribution
gender_dist = px.bar(data, x='GENDER', title='Gender Distribution')

# 2. Age Distribution
age_dist = px.histogram(data, x='Age', title='Age Distribution')

# 3. Highest Qualification Distribution
qualification_dist = px.pie(data, names='HIGHEST QUALIFICATION', title='Highest Qualification Distribution')

# 4. Course Intake Over Time
intake_over_time = px.line(data, x='INTAKE NO', y='STUDENT ID', title='Course Intake Over Time', markers=True)

# 5. Nationality Breakdown
nationality_breakdown = px.bar(data, x='NATIONALITY', title='Nationality Breakdown')

# 6. GPA Distribution
gpa_dist = px.histogram(data, x='GPA', title='GPA Distribution')

# 7. Course Fee vs GPA
course_fee_vs_gpa = px.scatter(data, x='COURSE FEE', y='GPA', title='Course Fee vs GPA')

# 8. Age vs GPA
age_vs_gpa = px.scatter(data, x='Age', y='GPA', title='Age vs GPA')

# 9. Funding Type Distribution
funding_type_dist = px.bar(data, x='COURSE FUNDING', title='Funding Type Distribution')

# 10. Designation Distribution
designation_dist = px.bar(data, y='DESIGNATION', title='Designation Distribution')

# Display the plots
gender_dist.show()
age_dist.show()
qualification_dist.show()
intake_over_time.show()
nationality_breakdown.show()
gpa_dist.show()
course_fee_vs_gpa.show()
age_vs_gpa.show()
funding_type_dist.show()
designation_dist.show()


In [7]:
# 1. Age and GPA by Gender: Bubble Chart
bubble_chart = px.scatter(data, x='Age', y='GPA', size='COURSE FEE', color='GENDER', 
                          title='Age and GPA by Gender', hover_name='DESIGNATION',
                          size_max=60)

# 2. Course Fee vs GPA by Funding Type: Scatter Plot with Trendlines
scatter_trendline = px.scatter(data, x='COURSE FEE', y='GPA', color='COURSE FUNDING',
                               title='Course Fee vs GPA by Funding Type', trendline='ols')

# 3. Distribution of Highest Qualification by Nationality: Stacked Bar Chart
stacked_bar = px.bar(data, x='NATIONALITY', y='STUDENT ID', color='HIGHEST QUALIFICATION', 
                     title='Distribution of Highest Qualification by Nationality', barmode='stack')

# 4. GPA Distribution by Designation: Box Plot
box_plot = px.box(data, x='DESIGNATION', y='GPA', color='DESIGNATION', title='GPA Distribution by Designation')

# 5. Age vs Course Fee by Gender and Nationality: 3D Scatter Plot
scatter_3d = px.scatter_3d(data, x='Age', y='COURSE FEE', z='GPA', color='GENDER', symbol='NATIONALITY',
                           title='Age vs Course Fee by Gender and Nationality')

# Save the plots as HTML files
bubble_chart.show()
scatter_trendline.show()
stacked_bar.show()
box_plot.show()
scatter_3d.show()

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 23 columns):
 #   Column                                 Non-Null Count  Dtype   
---  ------                                 --------------  -----   
 0   STUDENT ID                             244 non-null    object  
 1   SALUTATION                             244 non-null    object  
 2   GENDER                                 244 non-null    object  
 3   NATIONALITY                            244 non-null    object  
 4   DOB                                    244 non-null    object  
 5   HIGHEST QUALIFICATION                  244 non-null    object  
 6   NAME OF QUALIFICATION AND INSTITUTION  244 non-null    object  
 7   DATE ATTAINED HIGHEST QUALIFICATION    244 non-null    object  
 8   DESIGNATION                            244 non-null    object  
 9   INTAKE NO                              244 non-null    object  
 10  COMMENCEMENT DATE                      244 non-null    object 

In [8]:
fig1 = px.sunburst(df, path=['GENDER','CITIZENSHIP_STATUS','GPA Category'])
fig1.update_layout(title='Distribution of Gender and Nationality')

In [9]:
import pandas as pd
import plotly.express as px

# Assuming your dataframe is named 'df'
# If not, replace 'df' with your actual dataframe name

# 1. Gender and Nationality
fig1 = px.sunburst(df, path=['GENDER', 'NATIONALITY'])
fig1.update_layout(title='Distribution of Gender and Nationality')

# 2. Highest Qualification and Course
fig2 = px.sunburst(df, path=['HIGHEST QUALIFICATION', 'COURSE'])
fig2.update_layout(title='Distribution of Highest Qualification and Course')

# 3. Full-Time or Part-Time and Course Funding
fig3 = px.sunburst(df, path=['FULL-TIME OR PART-TIME', 'COURSE FUNDING'])
fig3.update_layout(title='Distribution of Study Mode and Course Funding')

# 4. Citizenship Status and Nationality
fig4 = px.sunburst(df, path=['CITIZENSHIP_STATUS', 'NATIONALITY'])
fig4.update_layout(title='Distribution of Citizenship Status and Nationality')

# 5. Payment Mode and Course Fee
fig5 = px.sunburst(df, path=['PAYMENT MODE', 'COURSE FEE'])
fig5.update_layout(title='Distribution of Payment Mode and Course Fee')

# 6. Age Groups and Gender
df['Age Group'] = pd.cut(df['Age'], bins=[0, 20, 30, 40, 50, 60, 100], labels=['0-20', '21-30', '31-40', '41-50', '51-60', '60+'])
fig6 = px.sunburst(df, path=['Age Group', 'GENDER'])
fig6.update_layout(title='Distribution of Age Groups and Gender')

# 7. Course and GPA Range
df['GPA Range'] = pd.cut(df['GPA'], bins=[0, 2, 2.5, 3, 3.5, 4], labels=['0-2', '2-2.5', '2.5-3', '3-3.5', '3.5-4'])
fig7 = px.sunburst(df, path=['COURSE', 'GPA Range'])
fig7.update_layout(title='Distribution of Course and GPA Range')

# 8. Intake No and Completion Date Year
df['Completion Year'] = pd.to_datetime(df['COMPLETION DATE']).dt.year
fig8 = px.sunburst(df, path=['INTAKE NO', 'Completion Year'])
fig8.update_layout(title='Distribution of Intake Number and Completion Year')

# 9. Designation and Highest Qualification
fig9 = px.sunburst(df, path=['DESIGNATION', 'HIGHEST QUALIFICATION'])
fig9.update_layout(title='Distribution of Designation and Highest Qualification')

# 10. Salutation, Gender, and Nationality
fig10 = px.sunburst(df, path=['SALUTATION', 'GENDER', 'NATIONALITY'])
fig10.update_layout(title='Distribution of Salutation, Gender, and Nationality')

In [10]:
fig2.show()

In [11]:
fig10.show()

In [12]:
import pandas as pd
import plotly.express as px
import numpy as np

# Assuming your dataframe is named 'df'

# 1. Course, GPA Range, Age Group, and Gender
df['GPA Range'] = pd.cut(df['GPA'], bins=[0, 2, 2.5, 3, 3.5, 4], labels=['0-2', '2-2.5', '2.5-3', '3-3.5', '3.5-4'])
df['Age Group'] = pd.cut(df['Age'], bins=[0, 20, 30, 40, 50, 60, 100], labels=['0-20', '21-30', '31-40', '41-50', '51-60', '60+'])
fig1 = px.sunburst(df, path=['COURSE', 'GPA Range', 'Age Group', 'GENDER'])
fig1.update_layout(title='Distribution of Course, GPA Range, Age Group, and Gender')

# 2. Nationality, Citizenship Status, Course Funding, and Payment Mode
fig2 = px.sunburst(df, path=['NATIONALITY', 'CITIZENSHIP_STATUS', 'COURSE FUNDING', 'PAYMENT MODE'])
fig2.update_layout(title='Distribution of Nationality, Citizenship Status, Course Funding, and Payment Mode')

# 3. Highest Qualification, Designation, Full-Time or Part-Time, and Course Fee Range
df['Course Fee Range'] = pd.cut(df['COURSE FEE'], bins=[0, 5000, 10000, 15000, 20000, np.inf], labels=['0-5k', '5k-10k', '10k-15k', '15k-20k', '20k+'])
fig3 = px.sunburst(df, path=['HIGHEST QUALIFICATION', 'DESIGNATION', 'FULL-TIME OR PART-TIME', 'Course Fee Range'])
fig3.update_layout(title='Distribution of Qualification, Designation, Study Mode, and Course Fee Range')

# 4. Intake Year, Completion Year, Study Duration, and Course
df['Intake Year'] = pd.to_datetime(df['COMMENCEMENT DATE']).dt.year
df['Completion Year'] = pd.to_datetime(df['COMPLETION DATE']).dt.year
df['Study Duration'] = df['date_diff'] // 365
df['Study Duration Group'] = pd.cut(df['Study Duration'], bins=[-1, 1, 2, 3, 4, np.inf], labels=['0-1 year', '1-2 years', '2-3 years', '3-4 years', '4+ years'])
fig4 = px.sunburst(df, path=['Intake Year', 'Completion Year', 'Study Duration Group', 'COURSE'])
fig4.update_layout(title='Distribution of Intake Year, Completion Year, Study Duration, and Course')

# 5. Age Group, Highest Qualification, Designation, and GPA Range
fig5 = px.sunburst(df, path=['Age Group', 'HIGHEST QUALIFICATION', 'DESIGNATION', 'GPA Range'])
fig5.update_layout(title='Distribution of Age Group, Highest Qualification, Designation, and GPA Range')

# 6. Course, Gender, Nationality, and Course Funding
fig6 = px.sunburst(df, path=['COURSE', 'GENDER', 'NATIONALITY', 'COURSE FUNDING'])
fig6.update_layout(title='Distribution of Course, Gender, Nationality, and Course Funding')

# 7. Payment Mode, Course Fee Range, Registration Fee Range, and Full-Time or Part-Time
# df['Registration Fee Range'] = pd.cut(df['REGISTRATION FEE'], bins=[0, 100, 200, 300, 400, np.inf], labels=['0-100', '101-200', '201-300', '301-400', '400+'])
# fig7 = px.sunburst(df, path=['PAYMENT MODE', 'Course Fee Range', 'Registration Fee Range', 'FULL-TIME OR PART-TIME'])
# fig7.update_layout(title='Distribution of Payment Mode, Course Fee, Registration Fee, and Study Mode')

# 8. Highest Qualification, Age Group, GPA Range, and Citizenship Status
fig8 = px.sunburst(df, path=['HIGHEST QUALIFICATION', 'Age Group', 'GPA Range', 'CITIZENSHIP_STATUS'])
fig8.update_layout(title='Distribution of Highest Qualification, Age Group, GPA Range, and Citizenship Status')

# 9. Course, Study Duration Group, Gender, and Course Fee Range
fig9 = px.sunburst(df, path=['COURSE', 'Study Duration Group', 'GENDER', 'Course Fee Range'])
fig9.update_layout(title='Distribution of Course, Study Duration, Gender, and Course Fee Range')

# 10. Nationality, Highest Qualification, Designation, and Age Group
fig10 = px.sunburst(df, path=['NATIONALITY', 'HIGHEST QUALIFICATION', 'DESIGNATION', 'Age Group'])
fig10.update_layout(title='Distribution of Nationality, Highest Qualification, Designation, and Age Group')

In [10]:
df.columns

Index(['STUDENT ID', 'SALUTATION', 'GENDER', 'NATIONALITY', 'DOB',
       'HIGHEST QUALIFICATION', 'NAME OF QUALIFICATION AND INSTITUTION',
       'DATE ATTAINED HIGHEST QUALIFICATION', 'DESIGNATION', 'INTAKE NO',
       'COMMENCEMENT DATE', 'COMPLETION DATE', 'FULL-TIME OR PART-TIME',
       'COURSE FUNDING', 'REGISTRATION FEE', 'PAYMENT MODE', 'COURSE FEE',
       'GPA', 'CITIZENSHIP_STATUS', 'date_diff', 'COURSE', 'Age',
       'GPA Category'],
      dtype='object')

In [9]:
df.head(1)

Unnamed: 0,STUDENT ID,SALUTATION,GENDER,NATIONALITY,DOB,HIGHEST QUALIFICATION,NAME OF QUALIFICATION AND INSTITUTION,DATE ATTAINED HIGHEST QUALIFICATION,DESIGNATION,INTAKE NO,...,COURSE FUNDING,REGISTRATION FEE,PAYMENT MODE,COURSE FEE,GPA,CITIZENSHIP_STATUS,date_diff,COURSE,Age,GPA Category
0,2020/1101-013/001,Ms,F,Singaporean,1978-04-03,Certificate,"Certificate in Office Skills, ITE",2016-11-06,"Snr Associate, Client Services",13th,...,Subsidiesed,107,NETS,1712,3.0,L,456,1101,46,3 - 3.5


In [11]:
import pandas as pd
import plotly.express as px

# Assuming your DataFrame is named df

# Feature engineering
df['Age Group'] = pd.cut(df['Age'], bins=[0, 25, 35, 45, 55, 65], labels=['<25', '25-35', '35-45', '45-55', '>55'])

# Create the sunburst plot
fig = px.sunburst(df, 
                  path=['GPA Category', 'Age Group', 'NATIONALITY'], 
                  values='COURSE FEE',
                  title='Sunburst Chart of GPA Category, Age Group, and Nationality',
                  color='GPA Category')

fig.show()

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 23 columns):
 #   Column                                 Non-Null Count  Dtype   
---  ------                                 --------------  -----   
 0   STUDENT ID                             244 non-null    object  
 1   SALUTATION                             244 non-null    object  
 2   GENDER                                 244 non-null    object  
 3   NATIONALITY                            244 non-null    object  
 4   DOB                                    244 non-null    object  
 5   HIGHEST QUALIFICATION                  244 non-null    object  
 6   NAME OF QUALIFICATION AND INSTITUTION  244 non-null    object  
 7   DATE ATTAINED HIGHEST QUALIFICATION    244 non-null    object  
 8   DESIGNATION                            244 non-null    object  
 9   INTAKE NO                              244 non-null    object  
 10  COMMENCEMENT DATE                      244 non-null    object 

In [32]:
# Prepare the data for the sunburst plot
import matplotlib.pyplot as plt
# Create age groups
df['Age Group'] = pd.cut(df['Age'], bins=[18, 25, 35, 45, 60], labels=['Young adulthood', 'Millennial', 'Middle Age', 'Seniors'])

# Drop rows with missing values in the relevant columns
df.dropna(subset=['COURSE', 'GENDER', 'Age Group'], inplace=True)

# Add a Count column for aggregation
df['Count'] = 1

fig = plt.figure(figsize=(15, 15))
# Create the sunburst plot
fig = px.sunburst(df, path=['COURSE', 'GENDER', 'Age Group'], values='Count',
                  color='GENDER', hover_data=['GPA'],
                  color_discrete_map={'M': 'blue', 'F': 'pink'})

# Update layout
fig.update_layout(title='Student Profiles Sunburst Plot',
                  margin=dict(t=40, l=0, r=0, b=5))

# Show the plot
fig.show()

<Figure size 1500x1500 with 0 Axes>

In [35]:
# Prepare the data for the sunburst plot
# Create age groups
df['Age Group'] = pd.cut(df['Age'], bins=[18, 25, 35, 45, 60], labels=['18-25', '26-35', '36-45', '46-60'])

# Drop rows with missing values in the relevant columns
df.dropna(subset=['COURSE', 'GENDER', 'Age Group'], inplace=True)

# Add a Count column for aggregation
df['Count'] = 1

# Filter data to include only the top 5 courses by student count
top_courses = df['COURSE'].value_counts().nlargest(5).index
df_filtered = df[df['COURSE'].isin(top_courses)]

# Create the sunburst plot
fig = px.sunburst(df_filtered, path=['COURSE', 'GENDER', 'Age Group'], values='Count',
                  color='GENDER', hover_data=['GPA'],
                  color_discrete_map={'M': 'blue', 'F': 'pink'})

# Update layout for better aesthetics and readability
fig.update_layout(
    title={
        'text': 'Student Profiles Sunburst Plot',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font=dict(size=24, family='Arial, sans-serif', color='black'),
    font=dict(family='Arial, sans-serif', size=16, color='black'),
    margin=dict(t=40, l=0, r=0, b=0),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# Show the plot
fig.show()


In [36]:
# Prepare the data for the sunburst plot
# Create age groups
df['Age Group'] = pd.cut(df['Age'], bins=[18, 25, 35, 45, 60], labels=['18-25', '26-35', '36-45', '46-60'])

# Drop rows with missing values in the relevant columns
df.dropna(subset=['COURSE', 'GENDER', 'Age Group', 'GPA'], inplace=True)

# Filter data to include only the top 5 courses by student count
top_courses = df['COURSE'].value_counts().nlargest(5).index
df_filtered = df[df['COURSE'].isin(top_courses)]

# Add a Count column for aggregation
df_filtered['Count'] = 1

# Group by course, gender, and age group and calculate average GPA and count
df_grouped = df_filtered.groupby(['COURSE', 'GENDER', 'Age Group']).agg(
    Count=('Count', 'sum'),
    Avg_GPA=('GPA', 'mean')
).reset_index()

# Create the sunburst plot
fig = px.sunburst(df_grouped, path=['COURSE', 'GENDER', 'Age Group'], values='Count',
                  color='GENDER', hover_data={'Avg_GPA': ':.2f'},
                  color_discrete_map={'M': 'blue', 'F': 'pink'})

# Update layout for better aesthetics and readability
fig.update_layout(
    title={
        'text': 'Student Profiles Sunburst Plot',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font=dict(size=24, family='Arial, sans-serif', color='black'),
    font=dict(family='Arial, sans-serif', size=16, color='black'),
    margin=dict(t=40, l=0, r=0, b=0),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# Show the plot
fig.show()


In [37]:
# Prepare the data for the sunburst plot
# Create age groups
df['Age Group'] = pd.cut(df['Age'], bins=[18, 25, 35, 45, 60], labels=['18-25', '26-35', '36-45', '46-60'])

# Drop rows with missing values in the relevant columns
df.dropna(subset=['COURSE', 'GENDER', 'Age Group', 'GPA'], inplace=True)

# Filter data to include only the top 5 courses by student count
top_courses = df['COURSE'].value_counts().nlargest(5).index
df_filtered = df[df['COURSE'].isin(top_courses)]

# Add a Count column for aggregation
df_filtered['Count'] = 1

# Group by course, gender, and age group and calculate average GPA and count
df_grouped = df_filtered.groupby(['COURSE', 'GENDER', 'Age Group']).agg(
    Count=('Count', 'sum'),
    Avg_GPA=('GPA', 'mean')
).reset_index()

# Create the sunburst plot
fig = px.sunburst(df_grouped, path=['COURSE', 'GENDER', 'Age Group'], values='Count',
                  color='GENDER', hover_data={'Avg_GPA': ':.2f', 'Count': True},
                  color_discrete_map={'M': 'blue', 'F': 'pink'})

# Update layout for better aesthetics and readability
fig.update_layout(
    title={
        'text': 'Student Profiles Sunburst Plot',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font=dict(size=24, family='Arial, sans-serif', color='black'),
    font=dict(family='Arial, sans-serif', size=16, color='black'),
    margin=dict(t=40, l=0, r=0, b=0),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# Show the plot
fig.show()


In [None]:
df

In [82]:
# Prepare the data for the sunburst plot
# Create simplified age groups
df['Age Group'] = pd.cut(df['Age'], bins=[18, 35, 60], labels=['Millenial', 'Middle Aged'])

# Drop rows with missing values in the relevant columns
df.dropna(subset=['COURSE', 'Age Group', 'GPA'], inplace=True)

# Filter data to include only the top 5 courses by student count
top_courses = df['COURSE'].value_counts().nlargest(5).index
df_filtered = df[df['COURSE'].isin(top_courses)]

# Add a Count column for aggregation
df_filtered['Count'] = 1

# Group by course and age group and calculate average GPA and count
df_grouped = df_filtered.groupby(['COURSE', 'Age Group']).agg(
    Count=('Count', 'sum'),
    Avg_GPA=('GPA', 'mean')
).reset_index()

# Calculate average GPA for each level
course_gpa = df_filtered.groupby('COURSE')['GPA'].mean().reset_index(name='GPA_course')

# Merge the average GPAs into the grouped dataframe
df_grouped = df_grouped.merge(course_gpa, on='COURSE')

# Create the sunburst plot
fig = px.sunburst(df_grouped, path=['COURSE', 'Age Group'], values='Count',
                  color='Avg_GPA', 
                  hover_data={
                      'Avg_GPA': ':.2f',
                      'GPA_course': ':.2f'
                  })

# Update hover template to show average GPAs for each level
fig.update_traces(
    hovertemplate='<b>%{label}</b><br>' +
                  'Count: %{value}<br>' +
                  'Course GPA: %{customdata[1]:.2f}<br>' +
                  'Age Group GPA: %{customdata[0]:.2f}'
)

# Update layout for better aesthetics and readability
fig.update_layout(
    title={
        'text': 'Course and Age with Average GPAs',
        'y':0.975,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font=dict(size=24, family='Arial, sans-serif', color='black'),
    font=dict(family='Arial, sans-serif', size=16, color='black'),
    margin=dict(t=40, l=0, r=0, b=10),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# Show the plot
fig.show()