In [1]:
import pandas as pd 
import  numpy as np 
import plotly.express as px 

In [2]:
dataset = [45, 50, 55, 60, 60, 65, 70, 75, 80, 85, 90, 95, 100, 100, 45, 50, 55, 60, 60, 65, 70, 100]

scores = pd.Series(dataset)

scores

0      45
1      50
2      55
3      60
4      60
5      65
6      70
7      75
8      80
9      85
10     90
11     95
12    100
13    100
14     45
15     50
16     55
17     60
18     60
19     65
20     70
21    100
dtype: int64

In [4]:
mean_vl = scores.mean()
median_vl = scores.median()
mode_vl = scores.mode()


print(f'Mean value : {mean_vl:.2f}')
print(f'Median value : {median_vl}')
print(f'Mode Value : {mode_vl}')

Mean value : 69.77
Median value : 65.0
Mode Value : 0    60
dtype: int64


In [6]:
fig = px.histogram(scores ,nbins= 10, title= "Distribution of scores with median and mode")

fig.add_vline(x=median_vl,line_dash = 'dash', line_color = 'red', annotation_text = 'Median', annotation_position = "top left")

fig.update_traces(marker=(dict(line=dict(color = "black", width = 1))))

# If multiple modes, mark all
for m in mode_vl:
    fig.add_vline(x=m, line_dash="dot", line_color="green", annotation_text=f"Mode: {m}", annotation_position="top right")

fig.show()

# Quartile and IQR 

In [7]:
q1 = scores.quantile(0.25)
q2 = scores.quantile(0.50)
q3 = scores.quantile(0.75)

iqr = q3 - q1

print(f'25th percentaile : {q1}')
print(f'50th percentaile : {q2}')
print(f'75th percentaile : {q3}')
print(f'IQR  : {iqr}')

25th percentaile : 56.25
50th percentaile : 65.0
75th percentaile : 83.75
IQR  : 27.5


In [10]:
fig = px.box(scores, title= 'Boxplot scores of (quantile and IQR)')

fig.add_hline(y=q1 , line_dash = 'dash', line_color = "pink", annotation_text = 'Q1', annotation_position = 'top right')
fig.add_hline(y=q3, line_dash = 'dash', line_color = 'orange', annotation_text = "Q2", annotation_position = 'top right')

fig.show()

# Skewness and Kurtosis

In [11]:
from scipy.stats import skew , kurtosis

data_skewness = skew(scores)
data_kurtosis = kurtosis(scores)

print(f'Data skewness : {data_skewness}')
print(f'Data kurtosis : {data_kurtosis}')

Data skewness : 0.44353065672197983
Data kurtosis : -1.0672990730134377


In [13]:
fig = px.histogram(scores ,nbins = 10, title= f'Distribution of scores\nSkewness : {data_skewness:.2f}, kurtosis : {data_kurtosis:.2f}')
fig.update_traces(marker=dict(line=dict(color = 'black', width = 1)))
fig.show()

In [17]:
fig = px.line(scores, title = 'Distribution of Score')
fig.add_hline(
    y=data_skewness, 
    line_dash="dash", 
    line_color="red",
    annotation_text=f"Skewness: {data_skewness:.2f}",
    annotation_position="top"
)
fig.show()

# Outliear detection

In [29]:
newdata = scores.to_list()
newdata.append(150)
data = pd.Series(newdata)

In [30]:
dq1 = data.quantile(0.25)
dq2 = data.quantile(0.50)
dq3 = data.quantile(0.75)
diqr = dq3 - dq1




lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr 

z_scores = (data - data.mean())/ data.std()
outlier_z = data[abs(z_scores) > 3]
print('Z_scores outlier : ', outlier_z.to_list())

Z_scores outlier :  [150]


In [31]:
fig = px.box(data, title="Boxplot Showing Outliers")
fig.update_traces(marker=dict(color="red", size=8))  # highlight outliers
fig.show()