### Importing the required libraries

In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from datetime import date, time, datetime
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.figure_factory as ff
import plotly.express as px
from plotly import tools
from plotly.subplots import make_subplots
from plotly.offline import iplot


Blowfish has been deprecated and will be removed in a future release



In [2]:
df = pd.read_csv(r"C:\Users\erpra\Documents\ExploratoryDataAnalysis\Student_performance_in_Exam\StudentsPerformance.csv")
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [3]:
print("Shape: ", df.shape)
print("Total null enteries: ", df.isna().sum().sum())
print("Total number columns: ", len(df.columns))
print("Data Types: \n \n", df.dtypes)

Shape:  (1000, 8)
Total null enteries:  0
Total number columns:  8
Data Types: 
 
 gender                         object
race/ethnicity                 object
parental level of education    object
lunch                          object
test preparation course        object
math score                      int64
reading score                   int64
writing score                   int64
dtype: object


In [4]:
## Renaming the column

df = df.rename(columns={
    "race/ethnicity":"race_ethnicity",
    "parental level of education":"parental_level_of_education",
    "test preparation course":"test_preparation_course",
    "math score":"math_score",
    "reading score":"reading_score",
    "writing score":"writing_score"
})

In [5]:
df.columns

Index(['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch',
       'test_preparation_course', 'math_score', 'reading_score',
       'writing_score'],
      dtype='object')

### 1. Scatter Plot

In [6]:
scatter = go.Scatter(
    x=df['reading_score'],
    y=df['writing_score'],
    mode='markers'
)

fig = go.Figure(scatter)

iplot(fig)

In [7]:
## adding layouts: titles etc

data = go.Scatter(
    x=df['reading_score'],
    y=df['writing_score'],
    mode='markers',
    marker=dict(size=12,
                color='rgb(0,189,255)',
                symbol='diamond',
                opacity=0.75,
                line={'color':'black','width':1.5}))

layout=go.Layout(title=dict(text='Reading Score & Writing Score',
                            y=0.9,
                            x=0.5,
                            xanchor='center',
                            yanchor='top'),
                xaxis={'title':'Reading Score'},
                yaxis={'title':'Writing Score'},
                hovermode='closest',
                template='plotly_white'
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [8]:
male = go.Scatter(x=df[df['gender']== 'male']['math_score'],
                  showlegend=True,
                  y=df[df['gender']=='male']['writing_score'],
                  name='Male',
                  mode='markers',
                  marker=dict(color='cornflowerblue',size=9, opacity=0.55))

female = go.Scatter(x=df[df['gender']== 'female']['math_score'],
                  showlegend=True,
                  y=df[df['gender']=='female']['writing_score'],
                  name='Female',
                  mode='markers',
                  marker=dict(color='darkorange',size=9, opacity=0.55))

data = [male, female]

layout = go.Layout(title='Maths & Writing score of male & female',
                   xaxis=dict(title='Maths Score'),
                   yaxis=dict(title='Writing Score'),
                   width=700,
                   height=450,
                   template='simple_white')

fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [9]:
data = [go.Scatter(x=df['reading_score'],
                   y=df['writing_score'],
                   mode='markers',
                   text=df['math_score'],
                   marker=dict(size=14,
                               color=df['math_score'],
                               showscale=True,
                               colorscale='Cividis',
                               colorbar=dict(title='Maths Score'),
                               opacity=0.6))]

layout = go.Layout(title=dict(text='Reading - Writting - Maths score', 
                              y = 0.9, 
                              x = 0.5, 
                              xanchor = 'center', 
                              yanchor = 'top'),
                   xaxis=dict(title="Reading Score"),
                   yaxis=dict(title='Writting Score'),
                   template='simple_white')

fig = go.Figure(data=data, layout=layout)
iplot(fig)

#### Scatter plot using 'for' loop on Unique enteries 

In [10]:
data = []

for i in df['parental_level_of_education'].unique():
    data.append(go.Scatter(x = df[df['parental_level_of_education'] == i]['reading_score'],
                           y = df[df['parental_level_of_education'] == i]['math_score'],
                           mode = 'markers',
                           name = str(i),
                           showlegend = True,
                           marker = dict(size = 12,
                                         opacity = 0.65)))
    
layout = go.Layout(title='Scores by level of Education', 
                   xaxis=dict(title="Reading Score"),
                   yaxis=dict(title='Writting Score'),
                   template='simple_white')

fig = go.Figure(data=data, layout=layout)

iplot(fig)

## 2. Bubble Charts

- A bubble chart is a type of chart that displays **three dimensions** of data.
- Bubble charts can be considered a variation of the scatter plot, in which the data points are replaced with bubbles.
- Define the third variable with <code>size</code> in bubble charts.

In [11]:
data = [
    go.Scatter(
        x = df['reading_score'],
        y = df['writing_score'],
        mode = 'markers',
        text = df['math_score'],
        marker = dict(size = df['math_score'] * 0.5, color = '#FFAE00',
                      showscale = False, opacity = 0.5,
                      line = dict(color = 'black', width = 0.5))
    )
]

layout = go.Layout(title= dict(text = 'Reading Score - Writting Score - Maths Score',
                               y = 0.9,
                               x = 0.5,
                               xanchor = 'center',
                               yanchor = 'top'),
                               xaxis = dict(title = 'Reading Score'),
                               yaxis = dict(title = 'Writting Score'),
                               template = 'plotly_white')

fig = go.Figure(data = data, layout = layout)
iplot(fig)

## 3D Scatter Plots

In [12]:
## Creating a columns having total values

df['total_score'] = df['reading_score'] + df['writing_score'] + df['math_score']

In [13]:
data = go.Scatter3d(x = df['reading_score'],
                    y = df['writing_score'],
                    z = df['math_score'],
                    mode = 'markers',
                    marker = dict(color = df['total_score'],
                                  showscale = True,
                                  colorbar = dict(title = 'Total Score'),
                                  colorscale = 'brwnyl',
                                  opacity = 0.7))

layout = go.Layout(title= dict(text = 'Reading Score - Writting Score - Maths Score',
                               y = 0.9,
                               x = 0.5,
                               xanchor = 'center',
                               yanchor = 'top'),
                        scene = dict(xaxis = dict(title = 'Reading Score'),
                                     yaxis = dict(title = 'Writting Score'),
                                     zaxis = dict(title = 'Maths Score')),
                        font = dict(size = 12),
                        template = 'plotly_white')

fig = go.Figure(data = data, layout = layout)
iplot(fig)

### 3. Line Charts

- A line chart displays a series of data points (markers) connected by line segments.
- It is similar to a scatter plot except that the measurement points are ordered (typically by their x-axis value) and joined with straight line segments.
- Often used to visualize a trend in data over intervals of time - known as a time series.
- To create a line chart, tune the <code>mode</code> parameter as "line".
- Like scatter plots, you can edit marker qualities like line color,line width, etc. in <code>line</code> parameter.

In [14]:
student_df = pd.DataFrame(
    {
        'date':[date(year=2023 + i, month=1, day=1) for i in range(10)],
        'students': np.random.randint(25,60,10),
        'lecturers': np.random.randint(10,20,10)
    }
)

student_df.head()

Unnamed: 0,date,students,lecturers
0,2023-01-01,50,14
1,2024-01-01,30,13
2,2025-01-01,44,18
3,2026-01-01,52,17
4,2027-01-01,29,16


In [15]:
line = [go.Scatter(
    x= student_df['date'],
    y= student_df['students'],
    mode = 'lines'
)]

fig = go.Figure(data=line)
iplot(fig)

In [16]:
data = go.Scatter(
    x= student_df['date'],
    y= student_df['students'],
    mode= 'lines',
    name= 'students'
)

layout = go.Layout(
    title={
        'text': "Number of students by years",
        'y': 0.9,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis = dict(title='Year'),
    yaxis = dict(title= 'Students'),
    template = 'plotly_white'
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [17]:
student = go.Scatter(
    x= student_df['date'],
    y= student_df['students'],
    mode= 'lines',
    name= 'students',
    marker = dict(color='darkorange')
)

lecturer = go.Scatter(
    x= student_df['date'],
    y= student_df['lecturers'],
    mode= 'lines',
    name= 'lecturers',
    marker = dict(color='royalblue')
)

layout = go.Layout(
    title={
        'text': "Number of students & Lecturers by years",
        'y': 0.9,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis = dict(title='Year'),
    template = 'plotly_dark'
)

fig = go.Figure(data=[student, lecturer], layout=layout)
iplot(fig)

In [18]:
fig = make_subplots(
    rows=1,
    cols=2,
    shared_yaxes = True,
    subplot_titles = ("2023 - 2027", "2028 - 2032")
)

fig.add_traces(go.Scatter(
    x = student_df['date'][0:5],
    y = student_df['students'][0:5],
    showlegend = False,
    name = 'student23-27',
    mode = 'lines',
    line = dict(color='#18FF01',
                width=3,
                dash='dashdot')),
    rows=1, cols=1
)

fig.add_traces(go.Scatter(
    x = student_df['date'][5:10],
    y = student_df['students'][5:10],
    showlegend = False,
    name = 'student28-32',
    mode = 'lines',
    line = dict(color='#01AAFF',
                width=3,
                dash='dash')),
    rows=1, cols=2
)


fig.update_yaxes(title_text = "Students", row=1,col=1)
fig.update_yaxes(title_text = "Students", row=1,col=2)

fig.update_layout(
    title=dict(text="Number of students by Years",
               y=0.9,
               x=0.5,
               xanchor='center',
               yanchor='top'),
    template = 'simple_white'
)

iplot(fig)

### 4. Bar Charts

- A bar chart presents **categorical data** with rectangular bars with heights (or lengths) proportional to the values that they represent.
- Built a bar chart with <code>go.Bar</code>
- Use <code>text</code> to display values on each bar.
- Bars can be edited with <code>marker</code>

In [19]:
bar = go.Figure(go.Bar(x=df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['gender'],
                       y=df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['reading_score']))

iplot(bar)

In [20]:
data = go.Bar(x=df.groupby('gender').agg({'math_score':'mean'}).reset_index()['gender'],
              y=df.groupby('gender').agg({'math_score':'mean'}).reset_index()['math_score'],
              width = [0.7, 0.7],
              marker = dict(color = 'cornflowerblue',
                            opacity = 0.7,
                            line_color = 'black',
                            line_width = 2))


layout = go.Layout(title='Average Maths Score by Gender',
                   xaxis= dict(title='Gender'),
                   yaxis= dict(title='Maths Score'),
                   width= 700,
                   height=700,
                   template= 'plotly_white')

fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [21]:
data = go.Bar(x=df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['gender'],
              y=df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['reading_score'],
              width = [0.5,0.5],
              text = round(df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['reading_score'],2),
              textposition='outside',
              textfont=dict(size=25,
                            color='darkred'),
              marker = dict(color='darkgreen',
                            line_color='yellow',
                            line_width=3,
                            opacity=0.65))

layout = go.Layout(title={
        'text': "Average Reading Score by Gender",
        'y': 0.9,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
                   xaxis= dict(title='Gender'),
                   yaxis= dict(title='Reading Score'),
                   width= 500,
                   height= 500,
                   template= 'simple_white')

fig=go.Figure(data=data, layout=layout)
fig.update_yaxes(range = [0,100])
iplot(fig)

#### Grouped Bar Charts

- A grouped bar chart extends the bar chart, plotting numeric values for levels of two categorical variables instead of one.
- Bars are grouped by position for levels of one categorical variable, with color indicating the secondary category level within each group.
- Use <code>barmode</code> to define the type of the bar chart.

In [22]:
reading_score = go.Bar(x=df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['gender'],
                       text=round(df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['reading_score'],2),
                       textposition= 'auto',
                       y=df.groupby('gender').agg({'reading_score':'mean'}).reset_index()['reading_score'],
                       name = 'Reading Score',
                       textfont=dict(size=16),
                       marker=dict(color='#06F5E3', opacity = 0.65))

writing_score = go.Bar(x=df.groupby('gender').agg({'writing_score':'mean'}).reset_index()['gender'],
                       text=round(df.groupby('gender').agg({'writing_score':'mean'}).reset_index()['writing_score'],2),
                       textposition= 'auto',
                       y=df.groupby('gender').agg({'writing_score':'mean'}).reset_index()['writing_score'],
                       name = 'Writing Score',
                       textfont=dict(size=16),
                       marker=dict(color='#FEAD00', opacity = 0.65))

maths_score = go.Bar(x=df.groupby('gender').agg({'math_score':'mean'}).reset_index()['gender'],
                       text=round(df.groupby('gender').agg({'math_score':'mean'}).reset_index()['math_score'],2),
                       textposition= 'auto',
                       y=df.groupby('gender').agg({'math_score':'mean'}).reset_index()['math_score'],
                       name = 'Maths Score',
                       textfont=dict(size=16),
                       marker=dict(color='#CC00FE', opacity = 0.65))


layout = go.Layout(title=dict(
    text = 'Average Score by Gender',
    x=0.9,
    y=0.5,
    xanchor='center',
    yanchor='top'
),
                   barmode='group', legend=dict(x=0.05,
                                                y=1.0,
                                                bgcolor='rgba(255,255,255,0)',
                                                bordercolor='rgba(255,255,255,0)'),
                   xaxis= dict(title='Gender'),
                   yaxis= dict(title='Scores'),
                   template= 'plotly_white')

fig = go.Figure(data = [reading_score, writing_score, maths_score], layout=layout)
fig.update_yaxes(range = [0,100])
iplot(fig)


#### Stacked Bar Charts

In [23]:
parental_avg = pd.DataFrame(df.groupby(['parental_level_of_education'])[['writing_score', 'math_score','reading_score']].agg('mean'))
parental_avg = parental_avg.reset_index()

In [24]:
t1 = go.Bar(x=parental_avg['parental_level_of_education'],
            y=parental_avg['writing_score'],
            name = 'writing_score',
            marker = dict(color ='#C70039',
                              opacity = 0.7))

t2 = go.Bar(x=parental_avg['parental_level_of_education'],
            y=parental_avg['math_score'],
            name = 'math_score',
            marker = dict(color ='#900C3F',
                              opacity = 0.7))

t3 = go.Bar(x=parental_avg['parental_level_of_education'],
            y=parental_avg['reading_score'],
            name = 'reading_score',
            marker = dict(color ='#581845',
                              opacity = 0.7))

layout = go.Layout(title='Average Score by Level of Education',
                   barmode='stack',
                   xaxis= dict(title='Level Of Education'),
                   yaxis= dict(title='Scores'),
                   template= 'plotly_dark')

fig = go.Figure(data=[t1,t2,t3], layout=layout)
fig.update_yaxes(range = [0,250])
iplot(fig)

### 5. Pie Charts

- A pie chart is a circular statistical graphic, which is divided into slices to illustrate numerical proportion.
- In a pie chart, the arc length of each slice, is proportional to the quantity it represents. While it is named for its resemblance to a pie which has been sliced, there are variations on the way it can be presented.
- Create a Pie Chart with <code>go.Pie</code>

In [25]:
pie = go.Figure(data=[go.Pie(labels= df['race_ethnicity'].value_counts().keys(),
                            values=df['race_ethnicity'].value_counts().values)])

iplot(pie)

In [26]:
colors = ['#28F20C', '#0CF2F2', '#F27F0C', '#F20C52']

fig = go.Figure(data=[go.Pie(labels=df['race_ethnicity'].value_counts().keys(),
                             values=df['race_ethnicity'].value_counts().values)])

fig.update_traces(hoverinfo='value', textinfo='label',textfont_size=16, textposition='auto',
                  showlegend=False, marker=dict(colors=colors))

fig.update_layout(title=dict(text='Race/Ethnicity Groups',
                       y=0.9,
                       x=0.5,
                       xanchor='center',
                       yanchor='top'),
                  template='simple_white')

iplot(fig)

In [27]:
colors = ['#14CFE8', '#E814C1']

fig = go.Figure(data=[go.Pie(labels=df['lunch'].value_counts().keys(),
                             values=df['lunch'].value_counts().values,
                             pull=[0,0.25])])

fig.update_traces(hoverinfo='label',
                  textinfo='percent',
                  textfont_size=20,
                  textposition='auto',
                  marker=dict(colors=colors,
                              line=dict(width=1.5, color='black')))

fig.update_layout(title=dict(text='Percentage of Lunch Type',
                       y=0.9,
                       x=0.5,
                       xanchor='center',
                       yanchor='top'),
                  template='plotly_dark')


iplot(fig)

#### Donut Charts

- A donut chart is a pie chart with a hole in the center.
- Use <code>hole</code> to define a hole. In this parameter, **larger values** come up with **bigger holes**.

In [28]:
colors = ['#D7DD19', '#6FDD19', '#19DDA5', '#195ADD','#A219DD','#DD1984']


fig = go.Figure(data=[go.Pie(labels=df['parental_level_of_education'].value_counts().keys(),
                             values=df['parental_level_of_education'].value_counts().values)])

fig.update_traces(hoverinfo='label',
                  textinfo='value',
                  hole=0.65,
                  textfont_size=12,
                  textposition='auto',
                  marker=dict(colors=colors,
                              line=dict(width=5, color='white')))

fig.update_layout(title=dict(text='Parental Level of Education',
                       y=0.9,
                       x=0.5,
                       xanchor='center',
                       yanchor='top'),
                  template='simple_white')


iplot(fig)

### 6. Histograms

- A histogram displays an accurate representation of the overall distribution of a **continuous feature**.
- In graphical objects, <code>go.Histogram</code> can be used to create a histogram.
- To create a histogram, we divide the entire range of values of the continuous feature into a series of intervals.
- This series of intervals are known as **"bins"**.
- Change the bin size with <code>size</code> to get either more or lesss detail.
- Determine the starting, ending and interval size with <code>xbins</code>

In [32]:
hist = go.Figure(go.Histogram(x=df['reading_score'], marker=dict(color='maroon')))
iplot(hist)

In [30]:
fig = [go.Histogram(x=df['math_score'],
                             xbins=dict(start=0,
                                        end=100,
                                        size=5),
                             marker=dict(color='#FFE400', line=dict(color='black', width=2)))]

layout = go.Layout(title='Maths Score',
                   xaxis=dict(title='Score'),
                   yaxis=dict(title='Frequency'),
                   width=700, height=450, template='simple_white')

img = go.Figure(data=fig, layout=layout)

iplot(img)

In [31]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=df[df['gender'] == 'male']['reading_score'],
                           xbins=dict(start=0,
                                        end=100,
                                        size=5),
                           name='Male',
                           marker=dict(color='#0891EF', opacity=0.5)))


fig.add_trace(go.Histogram(x=df[df['gender'] == 'female']['reading_score'],
                           xbins=dict(start=0,
                                        end=100,
                                        size=5),
                           name='Female',
                           marker=dict(color='#FF00E0', opacity=0.5)))


fig.update_layout(title='Reading Score Distribution Male vs Female',
                  barmode='overlay',
                   xaxis=dict(title='Score'),
                   yaxis=dict(title='Frequency'),
                   width=700, height=450)

iplot(fig)

### 7. Distplots

- Distribution Plots, or Distplots, typically layer three plots on top of one another.
- The first is a histogram, where each data point is placed inside a bin of similar values.
- The second is a rug plot - marks are placed along the x-axis for every data point, which lets you see the distribution of values inside each bin.
- Lastly, Distribution plots often include a "kernel density estimate", or KDE line that tries to describe the shape of the distribution.
- Use <code>create_distplot</code> to define a distplot.

In [36]:
hist_data = []
group_lable = []

for i in range(len(df['race_ethnicity'].unique())):
    hist_data.append(df[df['race_ethnicity'] == df['race_ethnicity'].unique()[i]]['math_score'])
    group_lable.append(df['race_ethnicity'].unique()[i])
    
fig = ff.create_distplot(hist_data, group_lable, bin_size=5)

fig.update_layout(title=dict(text="Math Score Distplot",
                             y=0.9,
                             x=0.5,
                             xanchor='center',
                             yanchor='top'),
                  barmode='overlay',
                  template='plotly_white')

iplot(fig)

Heatmaps

- Heatmaps allow the visualization of **3 features**.
- Categorical or continuous features along the x and y axis to make up a grid, and then a 3rd continuous feature displayed through color.
- X and Y axis are seperated into intervals to form a grid.
- **Categorical** features also can be defined on the **x** and **y** axis.
- Use <code>go.Heatmap</code> to define a heatmap.

In [43]:
heatmap = go.Figure(go.Heatmap(x=df['gender'],
                               y=df['test_preparation_course'],
                               z=df['math_score'].to_list()))

iplot(heatmap)

In [46]:
heatmap = [go.Heatmap(x=df['gender'],
                               y=df['parental_level_of_education'],
                               z=df['math_score'].to_list(),
                               colorscale='Magma',
                               )]

layout = go.Layout(title=dict(
                            text="Gender & Level of Education",
                             y=0.9,
                             x=0.5,
                             xanchor='center',
                             yanchor='top'
),
                   xaxis=dict(title='Gender'),
                   yaxis=dict(title='Level of Education'),
                   width=600,
                   height=450,
                   template='plotly_white')

fig = go.Figure(data=heatmap, layout=layout)

iplot(fig)

### 8. Box Plots
- Box Plots visualize the variation of a feature by depicting the **continuous numerical** data through quartiles.
- Seperate the data based on a categorical feature to compare the continuous feature based on catergory.
- Create a Box Plot with <code>go.Box</code>
- The Box Plot is a way of visually displaying the data distribution through their quartiles.
- Quartiles seperate the data into four equal parts.
- **Q1** represents the **first quartile** and is the 25th percentile.
- **Q2 (the median)** is the 50th percentile and shows that 50% of the scores.
- Finally, **Q3**, the 75th percentile, is the central point that lies between the median (Q2) and the highest number of the distribution.
- If we hover over the plot, we can display the median, max, min values and quartiles.

In [52]:
data = [go.Box(x=df['reading_score'],
               showlegend=False,
               name='Reading Score'),
        go.Box(x=df['writing_score'],
               showlegend=False,
               name='Writing Score'),
        go.Box(x=df['math_score'],
               showlegend=False,
               name='Maths Score')]


layout = go.Layout(title=dict(text="Scores",
                             y=0.9,
                             x=0.5,
                             xanchor='center',
                             yanchor='top'),
                   width=700,
                   height=450,
                   template='plotly_dark')

fig = go.Figure(data=data, layout=layout)

iplot(fig)

### 9. Subplots
- Subplots method provides a way to plot **multiple plots** on a **single figure**.
- Use <code>make_subplots</code> to create a subplot, then define the size of the subplot with <code>rows</code> and <code>cols</code>
- Set the title of each plot with <code>subplot_titles</code>

In [58]:
figure = make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=("Males", "Females"))

figure.add_trace(go.Box(y=df[df['gender'] == 'male']['reading_score'],
                 showlegend=False,
                 name='Reading Score',
                 marker_color='blue'),
                 row=1,col=1)

figure.add_trace(go.Box(y=df[df['gender'] == 'male']['writing_score'],
                 showlegend=False,
                 name='Writing Score',
                 marker_color='green'),
                 row=1,col=1)

figure.add_trace(go.Box(y=df[df['gender'] == 'male']['math_score'],
                 showlegend=False,
                 name='Maths Score',
                 marker_color='red'),
                 row=1,col=1)

figure.add_trace(go.Box(y=df[df['gender'] == 'female']['reading_score'],
                 showlegend=False,
                 name='Reading Score',
                 marker_color='blue'),
                 row=1,col=2)

figure.add_trace(go.Box(y=df[df['gender'] == 'female']['writing_score'],
                 showlegend=False,
                 name='Writing Score',
                 marker_color='green'),
                 row=1,col=2)

figure.add_trace(go.Box(y=df[df['gender'] == 'female']['math_score'],
                 showlegend=False,
                 name='Maths Score',
                 marker_color='red'),
                 row=1,col=2)


figure.update_layout(title=dict(text="Scores by Gender",
                             y=0.9,
                             x=0.5,
                             yanchor='bottom',
                             xanchor='center',),
                   width=800,
                   height=450,
                   template='plotly')

iplot(figure)

In [69]:
# colors = ['#4BA7CF','#CF5B4B','#B764D6','#E3885B','#5BE3E1']

colors = ['yellowgreen','maroon','violet','orange','cyan']

fig = make_subplots(rows=1,cols=2,
                    subplot_titles=('Countplot',
                                    'Percentages'),
                    specs=[[{"type": "xy"},
                            {'type':'domain'}]])

fig.add_trace(go.Bar( y = df['race_ethnicity'].value_counts().values.tolist(), 
                      x = df['race_ethnicity'].value_counts().index, 
                      text=df['race_ethnicity'].value_counts().values.tolist(),
                      textfont=dict(size = 12,
                                    color = 'white'),
                      name = 'race_ethnicity',
                      textposition = 'auto',
                      showlegend=False,
                      marker=dict(color = colors)),
              row = 1, col = 1)

fig.add_trace(go.Pie(labels=df['race_ethnicity'].value_counts().keys(),
                     values=df['race_ethnicity'].value_counts().values,
                     textfont = dict(size = 12,
                                     color = 'white'),
                     hole=0.55,
                     textposition='auto',
                     showlegend = False,
                     name = 'race_ethnicity',
                     marker=dict(colors = colors,line=dict(width=3, color='white'))),
              row = 1, col = 2)

fig.update_layout(title={'text': 'Race/Ethnicity',
                         'y':0.9,
                         'x':0.5,
                         'xanchor': 'center',
                         'yanchor': 'top'},
                  template='plotly_dark')

iplot(fig)