# Dataset

## First exploratory analysis

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib as plt
import plotly.express as px

In [2]:
df = pd.read_csv('/Users/alessandrasellini/code/projects/snake_conservation_status/raw_data/ConservationStatusofWorldSnakes.csv')
df

Unnamed: 0,Family,Scientific name,Common name(s),Red List status,Population trend
0,BOLYERIDAE,Bolyeria multocarinata,Round Island Burrowing Boa,EX,
1,DIPSADIDAE,Borikenophis sanctaecrucis,Saint Croix Racer,EX,
2,TYPHLOPIDAE,Typhlops cariei,,EX,
3,ELAPIDAE,Aipysurus apraefrontalis,"Sahul Reef Snake, Short-nosed Sea Snake",CR,decreasing
4,ELAPIDAE,Aipysurus foliosquama,Leaf-scaled Sea Snake,CR,decreasing
...,...,...,...,...,...
244,TYPHLOPIDAE,Typhlops syntherus,Barahona Peninsula Blindsnake,NT,unknown
245,VIPERIDAE,Vipera barani,,NT,decreasing
246,VIPERIDAE,Vipera lotievi,Lotiev's Viper,NT,decreasing
247,VIPERIDAE,Vipera monticola,Atlas Dwarf Viper,NT,decreasing


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249 entries, 0 to 248
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Family            249 non-null    object
 1   Scientific name   249 non-null    object
 2   Common name(s)    174 non-null    object
 3   Red List status   249 non-null    object
 4   Population trend  219 non-null    object
dtypes: object(5)
memory usage: 9.9+ KB


In [4]:
df.describe()

Unnamed: 0,Family,Scientific name,Common name(s),Red List status,Population trend
count,249,249,174,249,219
unique,18,249,174,5,4
top,VIPERIDAE,Bolyeria multocarinata,Round Island Burrowing Boa,VU,decreasing
freq,56,1,1,86,118


### Analysis for families

In [5]:
# population trend filled fofr (possibly) extinct species

families = df.drop(columns=['Common name(s)', 'Scientific name'])
families

Unnamed: 0,Family,Red List status,Population trend
0,BOLYERIDAE,EX,
1,DIPSADIDAE,EX,
2,TYPHLOPIDAE,EX,
3,ELAPIDAE,CR,decreasing
4,ELAPIDAE,CR,decreasing
...,...,...,...
244,TYPHLOPIDAE,NT,unknown
245,VIPERIDAE,NT,decreasing
246,VIPERIDAE,NT,decreasing
247,VIPERIDAE,NT,decreasing


In [6]:
families.fillna(value='(possibly) extinct', inplace=True)
families

Unnamed: 0,Family,Red List status,Population trend
0,BOLYERIDAE,EX,(possibly) extinct
1,DIPSADIDAE,EX,(possibly) extinct
2,TYPHLOPIDAE,EX,(possibly) extinct
3,ELAPIDAE,CR,decreasing
4,ELAPIDAE,CR,decreasing
...,...,...,...
244,TYPHLOPIDAE,NT,unknown
245,VIPERIDAE,NT,decreasing
246,VIPERIDAE,NT,decreasing
247,VIPERIDAE,NT,decreasing


In [7]:
families.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249 entries, 0 to 248
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Family            249 non-null    object
 1   Red List status   249 non-null    object
 2   Population trend  249 non-null    object
dtypes: object(3)
memory usage: 6.0+ KB


In [8]:
families.describe()

Unnamed: 0,Family,Red List status,Population trend
count,249,249,249
unique,18,5,5
top,VIPERIDAE,VU,decreasing
freq,56,86,118


In [9]:
families.value_counts('Family')

Family
VIPERIDAE             56
COLUBRIDAE            40
PSEUDOXYRHOPHIIDAE    31
DIPSADIDAE            30
ELAPIDAE              29
NATRICIDAE            22
TYPHLOPIDAE           14
CALAMARIIDAE           6
PYTHONIDAE             3
XENOSAURIDAE           3
BOIDAE                 3
HOMALOPSIDAE           3
LAMPROPHIIDAE          2
BOLYERIDAE             2
AMPHISBAENIDAE         2
PROSYMNIDAE            1
UROPELTIDAE            1
XENODERMATIDAE         1
dtype: int64

In [10]:
families.value_counts()

Family              Red List status  Population trend  
VIPERIDAE           VU               decreasing            12
COLUBRIDAE          NT               decreasing             8
VIPERIDAE           EN               decreasing             8
                                     unknown                8
                    NT               decreasing             8
                                                           ..
LAMPROPHIIDAE       VU               unknown                1
NATRICIDAE          CR               (possibly) extinct     1
PROSYMNIDAE         CR               unknown                1
PSEUDOXYRHOPHIIDAE  CR               decreasing             1
XENOSAURIDAE        VU               decreasing             1
Length: 91, dtype: int64

In [11]:
status = families.groupby(['Family']).value_counts().to_frame()
status

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
Family,Red List status,Population trend,Unnamed: 3_level_1
AMPHISBAENIDAE,NT,decreasing,1
AMPHISBAENIDAE,NT,unknown,1
BOIDAE,EN,unknown,2
BOIDAE,VU,(possibly) extinct,1
BOLYERIDAE,EN,(possibly) extinct,1
...,...,...,...
VIPERIDAE,VU,stable,1
VIPERIDAE,CR,(possibly) extinct,1
XENODERMATIDAE,VU,(possibly) extinct,1
XENOSAURIDAE,EN,decreasing,2


In [12]:
status.reset_index(inplace=True)

In [13]:
status

Unnamed: 0,Family,Red List status,Population trend,0
0,AMPHISBAENIDAE,NT,decreasing,1
1,AMPHISBAENIDAE,NT,unknown,1
2,BOIDAE,EN,unknown,2
3,BOIDAE,VU,(possibly) extinct,1
4,BOLYERIDAE,EN,(possibly) extinct,1
...,...,...,...,...
86,VIPERIDAE,VU,stable,1
87,VIPERIDAE,CR,(possibly) extinct,1
88,XENODERMATIDAE,VU,(possibly) extinct,1
89,XENOSAURIDAE,EN,decreasing,2


In [17]:
status.rename(columns = {0:'Count'}, inplace=True) 
status

Unnamed: 0,Family,Red List status,Population trend,Count
0,AMPHISBAENIDAE,NT,decreasing,1
1,AMPHISBAENIDAE,NT,unknown,1
2,BOIDAE,EN,unknown,2
3,BOIDAE,VU,(possibly) extinct,1
4,BOLYERIDAE,EN,(possibly) extinct,1
...,...,...,...,...
86,VIPERIDAE,VU,stable,1
87,VIPERIDAE,CR,(possibly) extinct,1
88,XENODERMATIDAE,VU,(possibly) extinct,1
89,XENOSAURIDAE,EN,decreasing,2


### Visualisation

In [19]:
fig = px.scatter(data_frame = status,
    x="Family",
    y="Red List status",
    size="Count",
    color="Population trend",
    size_max = 60,
    opacity = 0.5
)
fig.show()